[PATCH 2/3] bus: fsl-mc: dpio: enable qbman CENA portal memory access

2017-04-20 Thread Haiying Wang
Once we enable the cacheable portal memory, we need to do
cache flush for enqueue, vdq, buffer release, and management
commands, as well as invalidate and prefetch for the valid bit
of management command response and next index of dqrr.

Signed-off-by: Haiying Wang <haiying.w...@nxp.com>
---
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c 
b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
index 2a3ea29..e16121c 100644
--- a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
+++ b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
@@ -99,6 +99,14 @@ enum qbman_sdqcr_fc {
qbman_sdqcr_fc_up_to_3 = 1
 };
 
+#define dccvac(p) { asm volatile("dc cvac, %0;" : : "r" (p) : "memory"); }
+#define dcivac(p) { asm volatile("dc ivac, %0" : : "r"(p) : "memory"); }
+static inline void qbman_inval_prefetch(struct qbman_swp *p, uint32_t offset)
+{
+   dcivac(p->addr_cena + offset);
+   prefetch(p->addr_cena + offset);
+}
+
 /* Portal Access */
 
 static inline u32 qbman_read_register(struct qbman_swp *p, u32 offset)
@@ -189,7 +197,7 @@ struct qbman_swp *qbman_swp_init(const struct 
qbman_swp_desc *d)
p->addr_cinh = d->cinh_bar;
 
reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
-   1, /* Writes Non-cacheable */
+   0, /* Writes cacheable */
0, /* EQCR_CI stashing threshold */
3, /* RPM: Valid bit mode, RCR in array mode */
2, /* DCM: Discrete consumption ack mode */
@@ -315,6 +323,7 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 
cmd_verb)
 
dma_wmb();
*v = cmd_verb | p->mc.valid_bit;
+   dccvac(cmd);
 }
 
 /*
@@ -325,6 +334,7 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
 {
u32 *ret, verb;
 
+   qbman_inval_prefetch(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
ret = qbman_get_cmd(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
 
/* Remove the valid-bit - command completed if the rest is non-zero */
@@ -435,6 +445,7 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct 
qbman_eq_desc *d,
/* Set the verb byte, have to substitute in the valid-bit */
dma_wmb();
p->verb = d->verb | EQAR_VB(eqar);
+   dccvac(p);
 
return 0;
 }
@@ -627,6 +638,7 @@ int qbman_swp_pull(struct qbman_swp *s, struct 
qbman_pull_desc *d)
/* Set the verb byte, have to substitute in the valid-bit */
p->verb = d->verb | s->vdq.valid_bit;
s->vdq.valid_bit ^= QB_VALID_BIT;
+   dccvac(p);
 
return 0;
 }
@@ -680,8 +692,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
 s->dqrr.next_idx, pi);
s->dqrr.reset_bug = 0;
}
-   prefetch(qbman_get_cmd(s,
-  QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
}
 
p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
@@ -696,8 +707,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
 * knew from reading PI.
 */
if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) {
-   prefetch(qbman_get_cmd(s,
-  QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
return NULL;
}
/*
@@ -720,7 +730,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
(flags & DPAA2_DQ_STAT_EXPIRED))
atomic_inc(>vdq.available);
 
-   prefetch(qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
 
return p;
 }
@@ -848,6 +858,7 @@ int qbman_swp_release(struct qbman_swp *s, const struct 
qbman_release_desc *d,
 */
dma_wmb();
p->verb = d->verb | RAR_VB(rar) | num_buffers;
+   dccvac(p);
 
return 0;
 }
-- 
2.7.4



[PATCH 2/3] bus: fsl-mc: dpio: enable qbman CENA portal memory access

2017-04-20 Thread Haiying Wang
Once we enable the cacheable portal memory, we need to do
cache flush for enqueue, vdq, buffer release, and management
commands, as well as invalidate and prefetch for the valid bit
of management command response and next index of dqrr.

Signed-off-by: Haiying Wang 
---
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c 
b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
index 2a3ea29..e16121c 100644
--- a/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
+++ b/drivers/staging/fsl-mc/bus/dpio/qbman-portal.c
@@ -99,6 +99,14 @@ enum qbman_sdqcr_fc {
qbman_sdqcr_fc_up_to_3 = 1
 };
 
+#define dccvac(p) { asm volatile("dc cvac, %0;" : : "r" (p) : "memory"); }
+#define dcivac(p) { asm volatile("dc ivac, %0" : : "r"(p) : "memory"); }
+static inline void qbman_inval_prefetch(struct qbman_swp *p, uint32_t offset)
+{
+   dcivac(p->addr_cena + offset);
+   prefetch(p->addr_cena + offset);
+}
+
 /* Portal Access */
 
 static inline u32 qbman_read_register(struct qbman_swp *p, u32 offset)
@@ -189,7 +197,7 @@ struct qbman_swp *qbman_swp_init(const struct 
qbman_swp_desc *d)
p->addr_cinh = d->cinh_bar;
 
reg = qbman_set_swp_cfg(p->dqrr.dqrr_size,
-   1, /* Writes Non-cacheable */
+   0, /* Writes cacheable */
0, /* EQCR_CI stashing threshold */
3, /* RPM: Valid bit mode, RCR in array mode */
2, /* DCM: Discrete consumption ack mode */
@@ -315,6 +323,7 @@ void qbman_swp_mc_submit(struct qbman_swp *p, void *cmd, u8 
cmd_verb)
 
dma_wmb();
*v = cmd_verb | p->mc.valid_bit;
+   dccvac(cmd);
 }
 
 /*
@@ -325,6 +334,7 @@ void *qbman_swp_mc_result(struct qbman_swp *p)
 {
u32 *ret, verb;
 
+   qbman_inval_prefetch(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
ret = qbman_get_cmd(p, QBMAN_CENA_SWP_RR(p->mc.valid_bit));
 
/* Remove the valid-bit - command completed if the rest is non-zero */
@@ -435,6 +445,7 @@ int qbman_swp_enqueue(struct qbman_swp *s, const struct 
qbman_eq_desc *d,
/* Set the verb byte, have to substitute in the valid-bit */
dma_wmb();
p->verb = d->verb | EQAR_VB(eqar);
+   dccvac(p);
 
return 0;
 }
@@ -627,6 +638,7 @@ int qbman_swp_pull(struct qbman_swp *s, struct 
qbman_pull_desc *d)
/* Set the verb byte, have to substitute in the valid-bit */
p->verb = d->verb | s->vdq.valid_bit;
s->vdq.valid_bit ^= QB_VALID_BIT;
+   dccvac(p);
 
return 0;
 }
@@ -680,8 +692,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
 s->dqrr.next_idx, pi);
s->dqrr.reset_bug = 0;
}
-   prefetch(qbman_get_cmd(s,
-  QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
}
 
p = qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
@@ -696,8 +707,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
 * knew from reading PI.
 */
if ((verb & QB_VALID_BIT) != s->dqrr.valid_bit) {
-   prefetch(qbman_get_cmd(s,
-  QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
return NULL;
}
/*
@@ -720,7 +730,7 @@ const struct dpaa2_dq *qbman_swp_dqrr_next(struct qbman_swp 
*s)
(flags & DPAA2_DQ_STAT_EXPIRED))
atomic_inc(>vdq.available);
 
-   prefetch(qbman_get_cmd(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx)));
+   qbman_inval_prefetch(s, QBMAN_CENA_SWP_DQRR(s->dqrr.next_idx));
 
return p;
 }
@@ -848,6 +858,7 @@ int qbman_swp_release(struct qbman_swp *s, const struct 
qbman_release_desc *d,
 */
dma_wmb();
p->verb = d->verb | RAR_VB(rar) | num_buffers;
+   dccvac(p);
 
return 0;
 }
-- 
2.7.4



[PATCH 1/3] arm64: extend ioremap for cacheable non-shareable memory

2017-04-20 Thread Haiying Wang
NXP arm64 based SoC needs to allocate cacheable and
non-shareable memory for the software portals of
Queue manager, so we extend the arm64 ioremap support
for this memory attribute.

Signed-off-by: Haiying Wang <haiying.w...@nxp.com>
---
 arch/arm64/include/asm/io.h   | 1 +
 arch/arm64/include/asm/pgtable-prot.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 0c00c87..b6f03e7 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, 
size_t size);
 #define ioremap_nocache(addr, size)__ioremap((addr), (size), 
__pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size) __ioremap((addr), (size), 
__pgprot(PROT_NORMAL_NC))
 #define ioremap_wt(addr, size) __ioremap((addr), (size), 
__pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_cache_ns(addr, size)   __ioremap((addr), (size), 
__pgprot(PROT_NORMAL_NS))
 #define iounmap__iounmap
 
 /*
diff --git a/arch/arm64/include/asm/pgtable-prot.h 
b/arch/arm64/include/asm/pgtable-prot.h
index 2142c77..7fc7910 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -42,6 +42,7 @@
 #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
 #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
 #define PROT_NORMAL(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_NS (PTE_TYPE_PAGE | PTE_AF | PTE_PXN | PTE_UXN | 
PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | 
PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL   (PROT_SECT_DEFAULT | PMD_SECT_PXN | 
PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-- 
2.7.4



[PATCH 1/3] arm64: extend ioremap for cacheable non-shareable memory

2017-04-20 Thread Haiying Wang
NXP arm64 based SoC needs to allocate cacheable and
non-shareable memory for the software portals of
Queue manager, so we extend the arm64 ioremap support
for this memory attribute.

Signed-off-by: Haiying Wang 
---
 arch/arm64/include/asm/io.h   | 1 +
 arch/arm64/include/asm/pgtable-prot.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 0c00c87..b6f03e7 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -170,6 +170,7 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, 
size_t size);
 #define ioremap_nocache(addr, size)__ioremap((addr), (size), 
__pgprot(PROT_DEVICE_nGnRE))
 #define ioremap_wc(addr, size) __ioremap((addr), (size), 
__pgprot(PROT_NORMAL_NC))
 #define ioremap_wt(addr, size) __ioremap((addr), (size), 
__pgprot(PROT_DEVICE_nGnRE))
+#define ioremap_cache_ns(addr, size)   __ioremap((addr), (size), 
__pgprot(PROT_NORMAL_NS))
 #define iounmap__iounmap
 
 /*
diff --git a/arch/arm64/include/asm/pgtable-prot.h 
b/arch/arm64/include/asm/pgtable-prot.h
index 2142c77..7fc7910 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -42,6 +42,7 @@
 #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_NC))
 #define PROT_NORMAL_WT (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL_WT))
 #define PROT_NORMAL(PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_DIRTY | 
PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
+#define PROT_NORMAL_NS (PTE_TYPE_PAGE | PTE_AF | PTE_PXN | PTE_UXN | 
PTE_DIRTY | PTE_WRITE | PTE_ATTRINDX(MT_NORMAL))
 
 #define PROT_SECT_DEVICE_nGnRE (PROT_SECT_DEFAULT | PMD_SECT_PXN | 
PMD_SECT_UXN | PMD_ATTRINDX(MT_DEVICE_nGnRE))
 #define PROT_SECT_NORMAL   (PROT_SECT_DEFAULT | PMD_SECT_PXN | 
PMD_SECT_UXN | PMD_ATTRINDX(MT_NORMAL))
-- 
2.7.4



[PATCH 3/3] bus: fsl-mc: dpio: change CENA regs to be cacheable

2017-04-20 Thread Haiying Wang
plus non-shareable to meet the performance requirement.
QMan's CENA region contains registers and structures that
are 64byte in size and are inteneded to be accessed using a
single 64 byte bus transaction, therefore this portal
memory should be configured as cache-enabled. Also because
the write allocate stash transcations of QBMan should be
issued as cachable and non-coherent(non-sharable), we
need to configure this region to be non-shareable.

Signed-off-by: Haiying Wang <haiying.w...@nxp.com>
---
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c 
b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
index e36da20..97f909c 100644
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
+++ b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
@@ -168,10 +168,10 @@ static int dpaa2_dpio_probe(struct fsl_mc_device 
*dpio_dev)
desc.cpu = next_cpu;
 
/*
-* Set the CENA regs to be the cache inhibited area of the portal to
-* avoid coherency issues if a user migrates to another core.
+* Set the CENA regs to be the cache enalbed area of the portal to
+* archieve the best performance.
 */
-   desc.regs_cena = ioremap_wc(dpio_dev->regions[1].start,
+   desc.regs_cena = ioremap_cache_ns(dpio_dev->regions[1].start,
resource_size(_dev->regions[1]));
desc.regs_cinh = ioremap(dpio_dev->regions[1].start,
resource_size(_dev->regions[1]));
-- 
2.7.4



[PATCH 3/3] bus: fsl-mc: dpio: change CENA regs to be cacheable

2017-04-20 Thread Haiying Wang
plus non-shareable to meet the performance requirement.
QMan's CENA region contains registers and structures that
are 64byte in size and are inteneded to be accessed using a
single 64 byte bus transaction, therefore this portal
memory should be configured as cache-enabled. Also because
the write allocate stash transcations of QBMan should be
issued as cachable and non-coherent(non-sharable), we
need to configure this region to be non-shareable.

Signed-off-by: Haiying Wang 
---
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c 
b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
index e36da20..97f909c 100644
--- a/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
+++ b/drivers/staging/fsl-mc/bus/dpio/dpio-driver.c
@@ -168,10 +168,10 @@ static int dpaa2_dpio_probe(struct fsl_mc_device 
*dpio_dev)
desc.cpu = next_cpu;
 
/*
-* Set the CENA regs to be the cache inhibited area of the portal to
-* avoid coherency issues if a user migrates to another core.
+* Set the CENA regs to be the cache enalbed area of the portal to
+* archieve the best performance.
 */
-   desc.regs_cena = ioremap_wc(dpio_dev->regions[1].start,
+   desc.regs_cena = ioremap_cache_ns(dpio_dev->regions[1].start,
resource_size(_dev->regions[1]));
desc.regs_cinh = ioremap(dpio_dev->regions[1].start,
resource_size(_dev->regions[1]));
-- 
2.7.4



[PATCH 0/3] bus: fsl-mc: dpio: udpate QMan CENA region

2017-04-20 Thread Haiying Wang
This patchset allows the NXP's DPAA2 QMan Software portal
CENA region to be cacheable as designed for the performance
goal. Besides, the write allocate stash feature of the QMan
requires the non-shareable attribute for this cache-enabled
memory.
So this patchset extends the arm64 ioremap with cache-enable
and non-shareable first, then enables the CENA portal memory
access in the QBMan driver, at last changes the ioremap call
in dpio driver where the software portal CENA memory is
mapped to be the correct one.

Haiying Wang (3):
  arm64: extend ioremap for cacheable non-shareable memory
  bus: fsl-mc: dpio: enable qbman CENA portal memory access
  bus: fsl-mc: dpio: change CENA regs to be cacheable

 arch/arm64/include/asm/io.h|  1 +
 arch/arm64/include/asm/pgtable-prot.h  |  1 +
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c  |  6 +++---
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c | 23 +--
 4 files changed, 22 insertions(+), 9 deletions(-)

-- 
2.7.4



[PATCH 0/3] bus: fsl-mc: dpio: udpate QMan CENA region

2017-04-20 Thread Haiying Wang
This patchset allows the NXP's DPAA2 QMan Software portal
CENA region to be cacheable as designed for the performance
goal. Besides, the write allocate stash feature of the QMan
requires the non-shareable attribute for this cache-enabled
memory.
So this patchset extends the arm64 ioremap with cache-enable
and non-shareable first, then enables the CENA portal memory
access in the QBMan driver, at last changes the ioremap call
in dpio driver where the software portal CENA memory is
mapped to be the correct one.

Haiying Wang (3):
  arm64: extend ioremap for cacheable non-shareable memory
  bus: fsl-mc: dpio: enable qbman CENA portal memory access
  bus: fsl-mc: dpio: change CENA regs to be cacheable

 arch/arm64/include/asm/io.h|  1 +
 arch/arm64/include/asm/pgtable-prot.h  |  1 +
 drivers/staging/fsl-mc/bus/dpio/dpio-driver.c  |  6 +++---
 drivers/staging/fsl-mc/bus/dpio/qbman-portal.c | 23 +--
 4 files changed, 22 insertions(+), 9 deletions(-)

-- 
2.7.4