(Reupping this from a few months back as requested for discussion)

Allow a domain to opt into allocating and maintaining a TTBR1
pagetable.  The size of the TTBR1 region will be the same as
the TTBR0 size with the sign extension bit set on the highest
bit in the region.

By example, given a TTBR0/TTBR1 virtual address range of 36
bits the memory map will look like this:

   TTBR0 [0x0000000000000000:0x00000007FFFFFFFF]
   TTBR1 [0xFFFFFFF800000000:0xFFFFFFFFFFFFFFFF]

The map/unmap operations will automatically use the appropriate
pagetable for the given iova.

Signed-off-by: Jordan Crouse <jcro...@codeaurora.org>
---
 drivers/iommu/arm-smmu.c       |  40 +++++++++-
 drivers/iommu/io-pgtable-arm.c | 162 +++++++++++++++++++++++++++++++++++++----
 drivers/iommu/io-pgtable.h     |   5 ++
 include/linux/iommu.h          |   1 +
 4 files changed, 191 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index bc89b4d..9a1f522 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -389,6 +389,8 @@ struct arm_smmu_device {
        struct arm_smmu_s2cr            *s2crs;
        struct mutex                    stream_map_mutex;
 
+       u32                             ubs;
+
        unsigned long                   va_size;
        unsigned long                   ipa_size;
        unsigned long                   pa_size;
@@ -438,6 +440,7 @@ struct arm_smmu_domain {
        struct mutex                    init_mutex; /* Protects smmu pointer */
        spinlock_t                      cb_lock; /* Serialises ATS1* ops */
        struct iommu_domain             domain;
+       u32                             attributes;
 };
 
 struct arm_smmu_option_prop {
@@ -821,7 +824,6 @@ static void arm_smmu_init_context_bank(struct 
arm_smmu_domain *smmu_domain,
                } else {
                        reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
                        reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
-                       reg2 |= TTBCR2_SEP_UPSTREAM;
                        if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
                                reg2 |= TTBCR2_AS;
                }
@@ -881,6 +883,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain 
*domain,
 {
        int irq, start, ret = 0;
        unsigned long ias, oas;
+       int sep = 0;
        struct io_pgtable_ops *pgtbl_ops;
        struct io_pgtable_cfg pgtbl_cfg;
        enum io_pgtable_fmt fmt;
@@ -955,6 +958,25 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
                oas = smmu->ipa_size;
                if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
                        fmt = ARM_64_LPAE_S1;
+
+                       if (smmu_domain->attributes &
+                               (1 << DOMAIN_ATTR_ENABLE_TTBR1)) {
+
+                               /*
+                                * When the UBS id is 5 we know that the bus
+                                * size is 49 bits and that bit 48 is the fixed
+                                * sign extension bit.  For any other bus size
+                                * we need to specify the sign extension bit
+                                * and adjust the input size accordingly
+                                */
+
+                               if (smmu->ubs == 5) {
+                                       sep = 48;
+                               } else {
+                                       sep = ias - 1;
+                                       ias--;
+                               }
+                       }
                } else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
                        fmt = ARM_32_LPAE_S1;
                        ias = min(ias, 32UL);
@@ -1014,6 +1036,7 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
                .pgsize_bitmap  = smmu->pgsize_bitmap,
                .ias            = ias,
                .oas            = oas,
+               .sep            = sep,
                .tlb            = tlb_ops,
                .iommu_dev      = smmu->dev,
        };
@@ -1021,6 +1044,10 @@ static int arm_smmu_init_domain_context(struct 
iommu_domain *domain,
        if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
                pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
 
+
+       if (smmu_domain->attributes & (1 << DOMAIN_ATTR_ENABLE_TTBR1))
+               pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_TTBR1;
+
        smmu_domain->smmu = smmu;
        pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
        if (!pgtbl_ops) {
@@ -1621,6 +1648,10 @@ static int arm_smmu_domain_get_attr(struct iommu_domain 
*domain,
        case DOMAIN_ATTR_NESTING:
                *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
                return 0;
+       case DOMAIN_ATTR_ENABLE_TTBR1:
+               *((int *)data) = !!(smmu_domain->attributes
+                                       & (1 << DOMAIN_ATTR_ENABLE_TTBR1));
+               return 0;
        default:
                return -ENODEV;
        }
@@ -1650,6 +1681,11 @@ static int arm_smmu_domain_set_attr(struct iommu_domain 
*domain,
                        smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
 
                break;
+       case DOMAIN_ATTR_ENABLE_TTBR1:
+               if (*((int *)data))
+                       smmu_domain->attributes |=
+                               1 << DOMAIN_ATTR_ENABLE_TTBR1;
+               break;
        default:
                ret = -ENODEV;
        }
@@ -1991,7 +2027,7 @@ static int arm_smmu_device_cfg_probe(struct 
arm_smmu_device *smmu)
                if (smmu->version == ARM_SMMU_V1_64K)
                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
        } else {
-               size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
+               smmu->ubs = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
                smmu->va_size = arm_smmu_id_size_to_bits(size);
                if (id & ID2_PTFS_4K)
                        smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index b182039..ec015b0 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -127,14 +127,21 @@
 #define ARM_LPAE_TCR_TG0_64K           (1 << 14)
 #define ARM_LPAE_TCR_TG0_16K           (2 << 14)
 
+#define ARM_LPAE_TCR_TG1_16K            1ULL
+#define ARM_LPAE_TCR_TG1_4K             2ULL
+#define ARM_LPAE_TCR_TG1_64K            3ULL
+
 #define ARM_LPAE_TCR_SH0_SHIFT         12
 #define ARM_LPAE_TCR_SH0_MASK          0x3
+#define ARM_LPAE_TCR_SH1_SHIFT         28
 #define ARM_LPAE_TCR_SH_NS             0
 #define ARM_LPAE_TCR_SH_OS             2
 #define ARM_LPAE_TCR_SH_IS             3
 
 #define ARM_LPAE_TCR_ORGN0_SHIFT       10
+#define ARM_LPAE_TCR_ORGN1_SHIFT       26
 #define ARM_LPAE_TCR_IRGN0_SHIFT       8
+#define ARM_LPAE_TCR_IRGN1_SHIFT       24
 #define ARM_LPAE_TCR_RGN_MASK          0x3
 #define ARM_LPAE_TCR_RGN_NC            0
 #define ARM_LPAE_TCR_RGN_WBWA          1
@@ -147,6 +154,9 @@
 #define ARM_LPAE_TCR_T0SZ_SHIFT                0
 #define ARM_LPAE_TCR_SZ_MASK           0xf
 
+#define ARM_LPAE_TCR_T1SZ_SHIFT         16
+#define ARM_LPAE_TCR_T1SZ_MASK          0x3f
+
 #define ARM_LPAE_TCR_PS_SHIFT          16
 #define ARM_LPAE_TCR_PS_MASK           0x7
 
@@ -160,6 +170,19 @@
 #define ARM_LPAE_TCR_PS_44_BIT         0x4ULL
 #define ARM_LPAE_TCR_PS_48_BIT         0x5ULL
 
+#define ARM_LPAE_TCR_EPD1_SHIFT                23
+#define ARM_LPAE_TCR_EPD1_FAULT                1
+
+#define ARM_LPAE_TCR_SEP_SHIFT         (15 + 32)
+
+#define ARM_LPAE_TCR_SEP_31            0ULL
+#define ARM_LPAE_TCR_SEP_35            1ULL
+#define ARM_LPAE_TCR_SEP_39            2ULL
+#define ARM_LPAE_TCR_SEP_41            3ULL
+#define ARM_LPAE_TCR_SEP_43            4ULL
+#define ARM_LPAE_TCR_SEP_47            5ULL
+#define ARM_LPAE_TCR_SEP_UPSTREAM      7ULL
+
 #define ARM_LPAE_MAIR_ATTR_SHIFT(n)    ((n) << 3)
 #define ARM_LPAE_MAIR_ATTR_MASK                0xff
 #define ARM_LPAE_MAIR_ATTR_DEVICE      0x04
@@ -198,7 +221,7 @@ struct arm_lpae_io_pgtable {
        unsigned long           pg_shift;
        unsigned long           bits_per_level;
 
-       void                    *pgd;
+       void                    *pgd[2];
 };
 
 typedef u64 arm_lpae_iopte;
@@ -440,14 +463,35 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct 
arm_lpae_io_pgtable *data,
        return pte;
 }
 
+static inline arm_lpae_iopte *arm_lpae_get_table(
+               struct arm_lpae_io_pgtable *data, unsigned long iova)
+{
+       struct io_pgtable_cfg *cfg = &data->iop.cfg;
+
+       /*
+        * iovas for TTBR1 will have all the bits set between the input address
+        * region and the sign extension bit
+        */
+       if (unlikely(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) {
+               unsigned long mask = GENMASK(cfg->sep, cfg->ias);
+
+               if ((iova & mask) == mask)
+                       return data->pgd[1];
+       }
+
+       return data->pgd[0];
+}
+
 static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
                        phys_addr_t paddr, size_t size, int iommu_prot)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
-       arm_lpae_iopte *ptep = data->pgd;
+       arm_lpae_iopte *ptep;
        int ret, lvl = ARM_LPAE_START_LVL(data);
        arm_lpae_iopte prot;
 
+       ptep = arm_lpae_get_table(data, iova);
+
        /* If no access, then nothing to do */
        if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
                return 0;
@@ -498,7 +542,10 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_to_data(iop);
 
-       __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd);
+       __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data), data->pgd[0]);
+       if (data->pgd[1])
+               __arm_lpae_free_pgtable(data, ARM_LPAE_START_LVL(data),
+                       data->pgd[1]);
        kfree(data);
 }
 
@@ -607,8 +654,10 @@ static int arm_lpae_unmap(struct io_pgtable_ops *ops, 
unsigned long iova,
 {
        size_t unmapped;
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
-       arm_lpae_iopte *ptep = data->pgd;
        int lvl = ARM_LPAE_START_LVL(data);
+       arm_lpae_iopte *ptep;
+
+       ptep = arm_lpae_get_table(data, iova);
 
        unmapped = __arm_lpae_unmap(data, iova, size, lvl, ptep);
        if (unmapped)
@@ -621,8 +670,10 @@ static phys_addr_t arm_lpae_iova_to_phys(struct 
io_pgtable_ops *ops,
                                         unsigned long iova)
 {
        struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
-       arm_lpae_iopte pte, *ptep = data->pgd;
        int lvl = ARM_LPAE_START_LVL(data);
+       arm_lpae_iopte pte, *ptep;
+
+       ptep = arm_lpae_get_table(data, iova);
 
        do {
                /* Valid IOPTE pointer? */
@@ -733,6 +784,71 @@ static void arm_lpae_restrict_pgsizes(struct 
io_pgtable_cfg *cfg)
        return data;
 }
 
+static u64 arm64_lpae_setup_ttbr1(struct io_pgtable_cfg *cfg,
+               struct arm_lpae_io_pgtable *data)
+
+{
+       u64 reg;
+
+       /* If TTBR1 is disabled, disable speculative walks through the TTBR1 */
+       if (!(cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)) {
+               reg = ARM_LPAE_TCR_EPD1;
+               reg |= (ARM_LPAE_TCR_SEP_UPSTREAM << ARM_LPAE_TCR_SEP_SHIFT);
+               return reg;
+       }
+
+       if (cfg->iommu_dev && cfg->iommu_dev->archdata.dma_coherent)
+               reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH1_SHIFT) |
+                       (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_IRGN1_SHIFT) |
+                       (ARM_LPAE_TCR_RGN_WBWA << ARM_LPAE_TCR_ORGN1_SHIFT);
+       else
+               reg = (ARM_LPAE_TCR_SH_OS << ARM_LPAE_TCR_SH1_SHIFT) |
+                       (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_IRGN1_SHIFT) |
+                       (ARM_LPAE_TCR_RGN_NC << ARM_LPAE_TCR_ORGN1_SHIFT);
+
+       switch (1 << data->pg_shift) {
+       case SZ_4K:
+               reg |= (ARM_LPAE_TCR_TG1_4K << 30);
+               break;
+       case SZ_16K:
+               reg |= (ARM_LPAE_TCR_TG1_16K << 30);
+               break;
+       case SZ_64K:
+               reg |= (ARM_LPAE_TCR_TG1_64K << 30);
+               break;
+       }
+
+       /* Set T1SZ */
+       reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T1SZ_SHIFT;
+
+       switch (cfg->sep) {
+       case 31:
+               reg |= (ARM_LPAE_TCR_SEP_31 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 35:
+               reg |= (ARM_LPAE_TCR_SEP_35 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 39:
+               reg |= (ARM_LPAE_TCR_SEP_39 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 41:
+               reg |= (ARM_LPAE_TCR_SEP_41 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 43:
+               reg |= (ARM_LPAE_TCR_SEP_43 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 47:
+               reg |= (ARM_LPAE_TCR_SEP_47 << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       case 48:
+       default:
+               reg |= (ARM_LPAE_TCR_SEP_UPSTREAM << ARM_LPAE_TCR_SEP_SHIFT);
+               break;
+       }
+
+       return reg;
+}
+
 static struct io_pgtable *
 arm_64_lpae_alloc_pgtable_s1(struct io_pgtable_cfg *cfg, void *cookie)
 {
@@ -788,8 +904,9 @@ static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg 
*cfg)
 
        reg |= (64ULL - cfg->ias) << ARM_LPAE_TCR_T0SZ_SHIFT;
 
-       /* Disable speculative walks through TTBR1 */
-       reg |= ARM_LPAE_TCR_EPD1;
+       /* Bring in the TTBR1 configuration */
+       reg |= arm64_lpae_setup_ttbr1(cfg, data);
+
        cfg->arm_lpae_s1_cfg.tcr = reg;
 
        /* MAIRs */
@@ -804,16 +921,31 @@ static void arm_lpae_restrict_pgsizes(struct 
io_pgtable_cfg *cfg)
        cfg->arm_lpae_s1_cfg.mair[1] = 0;
 
        /* Looking good; allocate a pgd */
-       data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
-       if (!data->pgd)
+       data->pgd[0] = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+       if (!data->pgd[0])
                goto out_free_data;
 
+       if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
+               data->pgd[1] = __arm_lpae_alloc_pages(data->pgd_size,
+                       GFP_KERNEL, cfg);
+               if (!data->pgd[1]) {
+                       __arm_lpae_free_pages(data->pgd[0], data->pgd_size,
+                               cfg);
+                       goto out_free_data;
+               }
+       } else {
+               data->pgd[1] = NULL;
+       }
+
        /* Ensure the empty pgd is visible before any actual TTBR write */
        wmb();
 
        /* TTBRs */
-       cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd);
-       cfg->arm_lpae_s1_cfg.ttbr[1] = 0;
+       cfg->arm_lpae_s1_cfg.ttbr[0] = virt_to_phys(data->pgd[0]);
+
+       if (data->pgd[1])
+               cfg->arm_lpae_s1_cfg.ttbr[1] = virt_to_phys(data->pgd[1]);
+
        return &data->iop;
 
 out_free_data:
@@ -898,15 +1030,15 @@ static void arm_lpae_restrict_pgsizes(struct 
io_pgtable_cfg *cfg)
        cfg->arm_lpae_s2_cfg.vtcr = reg;
 
        /* Allocate pgd pages */
-       data->pgd = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
-       if (!data->pgd)
+       data->pgd[0] = __arm_lpae_alloc_pages(data->pgd_size, GFP_KERNEL, cfg);
+       if (!data->pgd[0])
                goto out_free_data;
 
        /* Ensure the empty pgd is visible before any actual TTBR write */
        wmb();
 
        /* VTTBR */
-       cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd);
+       cfg->arm_lpae_s2_cfg.vttbr = virt_to_phys(data->pgd[0]);
        return &data->iop;
 
 out_free_data:
@@ -1004,7 +1136,7 @@ static void __init arm_lpae_dump_ops(struct 
io_pgtable_ops *ops)
                cfg->pgsize_bitmap, cfg->ias);
        pr_err("data: %d levels, 0x%zx pgd_size, %lu pg_shift, %lu 
bits_per_level, pgd @ %p\n",
                data->levels, data->pgd_size, data->pg_shift,
-               data->bits_per_level, data->pgd);
+               data->bits_per_level, data->pgd[0]);
 }
 
 #define __FAIL(ops, i) ({                                              \
diff --git a/drivers/iommu/io-pgtable.h b/drivers/iommu/io-pgtable.h
index 524263a..efcf39a 100644
--- a/drivers/iommu/io-pgtable.h
+++ b/drivers/iommu/io-pgtable.h
@@ -70,16 +70,21 @@ struct io_pgtable_cfg {
         *      be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
         *      software-emulated IOMMU), such that pagetable updates need not
         *      be treated as explicit DMA data.
+        *
+        * IO_PGTABLE_QUIRK_ARM_TTBR1: Allocate and configure a supervisor
+        *      (TTBR1) pagetable as well as the user (TTBR0) table
         */
        #define IO_PGTABLE_QUIRK_ARM_NS         BIT(0)
        #define IO_PGTABLE_QUIRK_NO_PERMS       BIT(1)
        #define IO_PGTABLE_QUIRK_TLBI_ON_MAP    BIT(2)
        #define IO_PGTABLE_QUIRK_ARM_MTK_4GB    BIT(3)
        #define IO_PGTABLE_QUIRK_NO_DMA         BIT(4)
+       #define IO_PGTABLE_QUIRK_ARM_TTBR1      BIT(5)   /* Allocate TTBR1 PT */
        unsigned long                   quirks;
        unsigned long                   pgsize_bitmap;
        unsigned int                    ias;
        unsigned int                    oas;
+       int                             sep;
        const struct iommu_gather_ops   *tlb;
        struct device                   *iommu_dev;
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 2cb54ad..e91d054 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,7 @@ enum iommu_attr {
        DOMAIN_ATTR_FSL_PAMU_ENABLE,
        DOMAIN_ATTR_FSL_PAMUV1,
        DOMAIN_ATTR_NESTING,    /* two stages of translation */
+       DOMAIN_ATTR_ENABLE_TTBR1,
        DOMAIN_ATTR_MAX,
 };
 
-- 
1.9.1

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to