[PATCH v3 01/17] crypto: talitos - Use zero entry to init descriptors ptrs to zero

2015-04-17 Thread Christophe Leroy
Do use zero_entry value to init the descriptors ptrs to zero instead of
writing 0 in each field

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 857414a..7bf1b2b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1373,9 +1373,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
int sg_count, ret;
 
/* first DWORD empty */
-   desc->ptr[0].len = 0;
-   to_talitos_ptr(&desc->ptr[0], 0);
-   desc->ptr[0].j_extent = 0;
+   desc->ptr[0] = zero_entry;
 
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
@@ -1445,9 +1443,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
   DMA_FROM_DEVICE);
 
/* last DWORD empty */
-   desc->ptr[6].len = 0;
-   to_talitos_ptr(&desc->ptr[6], 0);
-   desc->ptr[6].j_extent = 0;
+   desc->ptr[6] = zero_entry;
 
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 00/17] crypto: talitos - Add support for SEC1

2015-04-17 Thread Christophe Leroy
The purpose of this set of patchs is to add to talitos crypto driver
the support for the SEC1 version of the security engine, which is
found in mpc885 and mpc8272 processors.

v3 is a complete rework of the patchset. Since a kernel can be built
with support for both MPC82xx and MPC83xx at the same time, talitos
driver shall support both SEC1 and SEC2+ at the same time.

Based on cryptodev-2.6 tree

Christophe Leroy (17):
  crypto: talitos - Use zero entry to init descriptors ptrs to zero
  crypto: talitos - Refactor the sg in/out chain allocation
  crypto: talitos - talitos_ptr renamed ptr for more lisibility
  crypto: talitos - Add a helper function to clear j_extent field
  crypto: talitos - remove param 'extent' in map_single_talitos_ptr()
  crypto: talitos - helper function for ptr len
  crypto: talitos - enhanced talitos_desc struct for SEC1
  crypto: talitos - add sub-choice in talitos CONFIG for SEC1
  crypto: talitos - Add a feature to tag SEC1
  crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+
  crypto: talitos - adaptation of talitos_submit() for SEC1
  crypto: talitos - base address for Execution Units
  crypto: talitos - adapt interrupts and reset functions to SEC1
  crypto: talitos - implement scatter/gather copy for SEC1
  crypto: talitos - SEC1 bugs on 0 data hash
  crypto: talitos - Add fsl,sec1.0 compatible
  crypto: talitos - Update DT bindings with SEC1

 .../devicetree/bindings/crypto/fsl-sec2.txt|   6 +-
 drivers/crypto/Kconfig |  18 +
 drivers/crypto/talitos.c   | 727 +++--
 drivers/crypto/talitos.h   | 153 +++--
 4 files changed, 644 insertions(+), 260 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 03/17] crypto: talitos - talitos_ptr renamed ptr for more lisibility

2015-04-17 Thread Christophe Leroy
Linux CodyingStyle recommends to use short variables for local
variables. ptr is just good enough for those 3 lines functions.
It helps keep single lines shorter than 80 characters.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 5a7e345..fca0aed 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -55,37 +55,37 @@
 
 #include "talitos.h"
 
-static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t 
dma_addr)
+static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr)
 {
-   talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
-   talitos_ptr->eptr = upper_32_bits(dma_addr);
+   ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
+   ptr->eptr = upper_32_bits(dma_addr);
 }
 
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
 static void map_single_talitos_ptr(struct device *dev,
-  struct talitos_ptr *talitos_ptr,
+  struct talitos_ptr *ptr,
   unsigned short len, void *data,
   unsigned char extent,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
-   talitos_ptr->len = cpu_to_be16(len);
-   to_talitos_ptr(talitos_ptr, dma_addr);
-   talitos_ptr->j_extent = extent;
+   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr(ptr, dma_addr);
+   ptr->j_extent = extent;
 }
 
 /*
  * unmap bus single (contiguous) h/w descriptor pointer
  */
 static void unmap_single_talitos_ptr(struct device *dev,
-struct talitos_ptr *talitos_ptr,
+struct talitos_ptr *ptr,
 enum dma_data_direction dir)
 {
-   dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr),
-be16_to_cpu(talitos_ptr->len), dir);
+   dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
+be16_to_cpu(ptr->len), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 02/17] crypto: talitos - Refactor the sg in/out chain allocation

2015-04-17 Thread Christophe Leroy
This patch refactors the handling of the input and output data that is quite
similar in several functions

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 159 ---
 1 file changed, 81 insertions(+), 78 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 7bf1b2b..5a7e345 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1327,16 +1327,23 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*cipher,
return 0;
 }
 
+static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src,
+struct scatterlist *dst, unsigned int len,
+struct talitos_edesc *edesc)
+{
+   talitos_sg_unmap(dev, edesc, src, dst);
+}
+
 static void common_nonsnoop_unmap(struct device *dev,
  struct talitos_edesc *edesc,
  struct ablkcipher_request *areq)
 {
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+
+   unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
 
-   talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
-
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 DMA_BIDIRECTIONAL);
@@ -1358,6 +1365,65 @@ static void ablkcipher_done(struct device *dev,
areq->base.complete(&areq->base, err);
 }
 
+int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src,
+ unsigned int len, struct talitos_edesc *edesc,
+ enum dma_data_direction dir, struct talitos_ptr *ptr)
+{
+   int sg_count;
+
+   ptr->len = cpu_to_be16(len);
+   ptr->j_extent = 0;
+
+   sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
+ edesc->src_chained);
+
+   if (sg_count == 1) {
+   to_talitos_ptr(ptr, sg_dma_address(src));
+   } else {
+   sg_count = sg_to_link_tbl(src, sg_count, len,
+ &edesc->link_tbl[0]);
+   if (sg_count > 1) {
+   to_talitos_ptr(ptr, edesc->dma_link_tbl);
+   ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+   dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+  edesc->dma_len,
+  DMA_BIDIRECTIONAL);
+   } else {
+   /* Only one segment now, so no link tbl needed */
+   to_talitos_ptr(ptr, sg_dma_address(src));
+   }
+   }
+   return sg_count;
+}
+
+void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst,
+   unsigned int len, struct talitos_edesc *edesc,
+   enum dma_data_direction dir,
+   struct talitos_ptr *ptr, int sg_count)
+{
+   ptr->len = cpu_to_be16(len);
+   ptr->j_extent = 0;
+
+   if (dir != DMA_NONE)
+   sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
+ dir, edesc->dst_chained);
+
+   if (sg_count == 1) {
+   to_talitos_ptr(ptr, sg_dma_address(dst));
+   } else {
+   struct talitos_ptr *link_tbl_ptr =
+   &edesc->link_tbl[edesc->src_nents + 1];
+
+   to_talitos_ptr(ptr, edesc->dma_link_tbl +
+ (edesc->src_nents + 1) *
+ sizeof(struct talitos_ptr));
+   ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+   sg_count = sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr);
+   dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+  edesc->dma_len, DMA_BIDIRECTIONAL);
+   }
+}
+
 static int common_nonsnoop(struct talitos_edesc *edesc,
   struct ablkcipher_request *areq,
   void (*callback) (struct device *dev,
@@ -1387,56 +1453,16 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
/*
 * cipher in
 */
-   desc->ptr[3].len = cpu_to_be16(cryptlen);
-   desc->ptr[3].j_extent = 0;
-
-   sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
- (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-  : DMA_TO_DEVICE,
- edesc->src_chained);

[PATCH v3 06/17] crypto: talitos - helper function for ptr len

2015-04-17 Thread Christophe Leroy
This patch adds a helper function for reads and writes of the len
param of the talitos descriptor. This will help implement
SEC1 later.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 81e5636..bca6ded 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -61,6 +61,16 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, 
dma_addr_t dma_addr)
ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len)
+{
+   ptr->len = cpu_to_be16(len);
+}
+
+static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr)
+{
+   return be16_to_cpu(ptr->len);
+}
+
 static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
 {
ptr->j_extent = 0;
@@ -76,7 +86,7 @@ static void map_single_talitos_ptr(struct device *dev,
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr(ptr, dma_addr);
to_talitos_ptr_extent_clear(ptr);
 }
@@ -89,7 +99,7 @@ static void unmap_single_talitos_ptr(struct device *dev,
 enum dma_data_direction dir)
 {
dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
-be16_to_cpu(ptr->len), dir);
+from_talitos_ptr_len(ptr), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
@@ -1375,7 +1385,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
 {
int sg_count;
 
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr_extent_clear(ptr);
 
sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
@@ -1405,7 +1415,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct 
scatterlist *dst,
enum dma_data_direction dir,
struct talitos_ptr *ptr, int sg_count)
 {
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr_extent_clear(ptr);
 
if (dir != DMA_NONE)
@@ -1447,7 +1457,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
-   desc->ptr[1].len = cpu_to_be16(ivsize);
+   to_talitos_ptr_len(&desc->ptr[1], ivsize);
to_talitos_ptr_extent_clear(&desc->ptr[1]);
 
/* cipher key */
@@ -1539,11 +1549,11 @@ static void common_nonsnoop_hash_unmap(struct device 
*dev,
unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc);
 
/* When using hashctx-in, must unmap it. */
-   if (edesc->desc.ptr[1].len)
+   if (from_talitos_ptr_len(&edesc->desc.ptr[1]))
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
 DMA_TO_DEVICE);
 
-   if (edesc->desc.ptr[2].len)
+   if (from_talitos_ptr_len(&edesc->desc.ptr[2]))
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2],
 DMA_TO_DEVICE);
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 05/17] crypto: talitos - remove param 'extent' in map_single_talitos_ptr()

2015-04-17 Thread Christophe Leroy
map_single_talitos_ptr() is always called with extent == 0, so lets remove this 
unused parameter

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c93f79b..81e5636 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -72,14 +72,13 @@ static void to_talitos_ptr_extent_clear(struct talitos_ptr 
*ptr)
 static void map_single_talitos_ptr(struct device *dev,
   struct talitos_ptr *ptr,
   unsigned short len, void *data,
-  unsigned char extent,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
ptr->len = cpu_to_be16(len);
to_talitos_ptr(ptr, dma_addr);
-   ptr->j_extent = extent;
+   to_talitos_ptr_extent_clear(ptr);
 }
 
 /*
@@ -958,7 +957,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
 
/* hmac key */
map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key,
-  0, DMA_TO_DEVICE);
+  DMA_TO_DEVICE);
 
/* hmac data */
desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize);
@@ -1002,7 +1001,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen,
-  (char *)&ctx->key + ctx->authkeylen, 0,
+  (char *)&ctx->key + ctx->authkeylen,
   DMA_TO_DEVICE);
 
/*
@@ -1080,7 +1079,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
}
 
/* iv out */
-   map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0,
+   map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
   DMA_FROM_DEVICE);
 
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
@@ -1453,7 +1452,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-  (char *)&ctx->key, 0, DMA_TO_DEVICE);
+  (char *)&ctx->key, DMA_TO_DEVICE);
 
/*
 * cipher in
@@ -1470,7 +1469,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
   &desc->ptr[4], sg_count);
 
/* iv out */
-   map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
+   map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv,
   DMA_FROM_DEVICE);
 
/* last DWORD empty */
@@ -1595,7 +1594,7 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
if (!req_ctx->first || req_ctx->swinit) {
map_single_talitos_ptr(dev, &desc->ptr[1],
   req_ctx->hw_context_size,
-  (char *)req_ctx->hw_context, 0,
+  (char *)req_ctx->hw_context,
   DMA_TO_DEVICE);
req_ctx->swinit = 0;
} else {
@@ -1607,7 +1606,7 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
/* HMAC key */
if (ctx->keylen)
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-  (char *)&ctx->key, 0, DMA_TO_DEVICE);
+  (char *)&ctx->key, DMA_TO_DEVICE);
else
desc->ptr[2] = zero_entry;
 
@@ -1624,11 +1623,11 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
if (req_ctx->last)
map_single_talitos_ptr(dev, &desc->ptr[5],
   crypto_ahash_digestsize(tfm),
-  areq->result, 0, DMA_FROM_DEVICE);
+  areq->result, DMA_FROM_DEVICE);
else
map_single_talitos_ptr(dev, &desc->ptr[5],
   req_ctx->hw_context_size,
-  req_ctx->hw_context, 0, DMA_FROM_DEVICE);
+  req_ctx->hw_context, DMA_FROM_DEVICE);
 
/* last DWORD empty */
desc->ptr[6] = zero_entry;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 04/17] crypto: talitos - Add a helper function to clear j_extent field

2015-04-17 Thread Christophe Leroy
j_extent field is specific to SEC2 so we add a helper function to clear it
so that SEC1 can redefine that function as nop

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index fca0aed..c93f79b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -61,6 +61,11 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, 
dma_addr_t dma_addr)
ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
+{
+   ptr->j_extent = 0;
+}
+
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
@@ -1372,7 +1377,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
int sg_count;
 
ptr->len = cpu_to_be16(len);
-   ptr->j_extent = 0;
+   to_talitos_ptr_extent_clear(ptr);
 
sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
  edesc->src_chained);
@@ -1402,7 +1407,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct 
scatterlist *dst,
struct talitos_ptr *ptr, int sg_count)
 {
ptr->len = cpu_to_be16(len);
-   ptr->j_extent = 0;
+   to_talitos_ptr_extent_clear(ptr);
 
if (dir != DMA_NONE)
sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
@@ -1444,7 +1449,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
desc->ptr[1].len = cpu_to_be16(ivsize);
-   desc->ptr[1].j_extent = 0;
+   to_talitos_ptr_extent_clear(&desc->ptr[1]);
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 07/17] crypto: talitos - enhanced talitos_desc struct for SEC1

2015-04-17 Thread Christophe Leroy
This patch enhances the talitos_desc struct with fields for SEC1.
SEC1 has only one header field, and has a 'next_desc' field in
addition.
This mixed descriptor will continue to fit SEC2, and for SEC1
we will recopy hdr value into hdr1 value in talitos_submit()

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.h | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 61a1405..f078da1 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -37,9 +37,17 @@
 
 /* descriptor pointer entry */
 struct talitos_ptr {
-   __be16 len; /* length */
-   u8 j_extent;/* jump to sg link table and/or extent */
-   u8 eptr;/* extended address */
+   union {
+   struct {/* SEC2 format */
+   __be16 len; /* length */
+   u8 j_extent;/* jump to sg link table and/or extent*/
+   u8 eptr;/* extended address */
+   };
+   struct {/* SEC1 format */
+   __be16 res;
+   __be16 len1;/* length */
+   };
+   };
__be32 ptr; /* address */
 };
 
@@ -53,8 +61,12 @@ static const struct talitos_ptr zero_entry = {
 /* descriptor */
 struct talitos_desc {
__be32 hdr; /* header high bits */
-   __be32 hdr_lo;  /* header low bits */
+   union {
+   __be32 hdr_lo;  /* header low bits */
+   __be32 hdr1;/* header for SEC1 */
+   };
struct talitos_ptr ptr[7];  /* ptr/len pair array */
+   __be32 next_desc;   /* next descriptor (SEC1) */
 };
 
 /**
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 08/17] crypto: talitos - add sub-choice in talitos CONFIG for SEC1

2015-04-17 Thread Christophe Leroy
This patch adds a CONFIG option to select SEC1, SEC2+ or both.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/Kconfig | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 800bf41..8a76a01 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS
  To compile this driver as a module, choose M here: the module
  will be called talitos.
 
+config CRYPTO_DEV_TALITOS1
+   bool "SEC1 (SEC 1.0 and SEC Lite 1.2)"
+   depends on CRYPTO_DEV_TALITOS
+   depends on PPC_8xx || PPC_82xx
+   default y
+   help
+ Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0
+ found on MPC82xx or the Freescale Security Engine (SEC Lite)
+ version 1.2 found on MPC8xx
+
+config CRYPTO_DEV_TALITOS2
+   bool "SEC2+ (SEC version 2.0 or upper)"
+   depends on CRYPTO_DEV_TALITOS
+   default y if !PPC_8xx
+   help
+ Say 'Y' here to use the Freescale Security Engine (SEC)
+ version 2 and following as found on MPC83xx, MPC85xx, etc ...
+
 config CRYPTO_DEV_IXP4XX
tristate "Driver for IXP4xx crypto hardware acceleration"
depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 09/17] crypto: talitos - Add a feature to tag SEC1

2015-04-17 Thread Christophe Leroy
We add a new feature in the features field, to mark compatible
"fsl,sec1.0"
We also define a helper function called has_ftr_sec1() to help
functions quickly determine if they are running on SEC1 or SEC2+.
When only SEC1 or SEC2 is compiled in, has_ftr_sec1() return
trivial corresponding value. If both are compiled in, feature
field is checked.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c |  3 +++
 drivers/crypto/talitos.h | 17 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index bca6ded..db95023 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2709,6 +2709,9 @@ static int talitos_probe(struct platform_device *ofdev)
  TALITOS_FTR_SHA224_HWINIT |
  TALITOS_FTR_HMAC_OK;
 
+   if (of_device_is_compatible(np, "fsl,sec1.0"))
+   priv->features |= TALITOS_FTR_SEC1;
+
priv->chan = kzalloc(sizeof(struct talitos_channel) *
 priv->num_channels, GFP_KERNEL);
if (!priv->chan) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index f078da1..b0bdb4e 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -156,6 +156,23 @@ extern int talitos_submit(struct device *dev, int ch, 
struct talitos_desc *desc,
 #define TALITOS_FTR_HW_AUTH_CHECK 0x0002
 #define TALITOS_FTR_SHA224_HWINIT 0x0004
 #define TALITOS_FTR_HMAC_OK 0x0008
+#define TALITOS_FTR_SEC1 0x0010
+
+/*
+ * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are
+ * defined, we check the features which are set according to the device tree.
+ * Otherwise, we answer true or false directly
+ */
+static inline bool has_ftr_sec1(struct talitos_private *priv)
+{
+#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2)
+   return priv->features & TALITOS_FTR_SEC1 ? true : false;
+#elif defined(CONFIG_CRYPTO_DEV_TALITOS1)
+   return true;
+#else
+   return false;
+#endif
+}
 
 /*
  * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 10/17] crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+

2015-04-17 Thread Christophe Leroy
talitos descriptor is slightly different for SEC1 and SEC2+, so
lets the helper function that fills the descriptor take into account
the type of SEC.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 105 ++-
 1 file changed, 67 insertions(+), 38 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index db95023..678b528 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -55,25 +55,38 @@
 
 #include "talitos.h"
 
-static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr)
+static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
+  bool is_sec1)
 {
ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
-   ptr->eptr = upper_32_bits(dma_addr);
+   if (!is_sec1)
+   ptr->eptr = upper_32_bits(dma_addr);
 }
 
-static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len)
+static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len,
+  bool is_sec1)
 {
-   ptr->len = cpu_to_be16(len);
+   if (is_sec1) {
+   ptr->res = 0;
+   ptr->len1 = cpu_to_be16(len);
+   } else {
+   ptr->len = cpu_to_be16(len);
+   }
 }
 
-static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr)
+static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr,
+  bool is_sec1)
 {
-   return be16_to_cpu(ptr->len);
+   if (is_sec1)
+   return be16_to_cpu(ptr->len1);
+   else
+   return be16_to_cpu(ptr->len);
 }
 
-static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
+static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1)
 {
-   ptr->j_extent = 0;
+   if (!is_sec1)
+   ptr->j_extent = 0;
 }
 
 /*
@@ -85,10 +98,12 @@ static void map_single_talitos_ptr(struct device *dev,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
 
-   to_talitos_ptr_len(ptr, len);
-   to_talitos_ptr(ptr, dma_addr);
-   to_talitos_ptr_extent_clear(ptr);
+   to_talitos_ptr_len(ptr, len, is_sec1);
+   to_talitos_ptr(ptr, dma_addr, is_sec1);
+   to_talitos_ptr_extent_clear(ptr, is_sec1);
 }
 
 /*
@@ -98,8 +113,11 @@ static void unmap_single_talitos_ptr(struct device *dev,
 struct talitos_ptr *ptr,
 enum dma_data_direction dir)
 {
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+
dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
-from_talitos_ptr_len(ptr), dir);
+from_talitos_ptr_len(ptr, is_sec1), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
@@ -922,7 +940,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int 
sg_count,
int n_sg = sg_count;
 
while (n_sg--) {
-   to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg));
+   to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0);
link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg));
link_tbl_ptr->j_extent = 0;
link_tbl_ptr++;
@@ -976,7 +994,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
-  sizeof(struct talitos_ptr));
+  sizeof(struct talitos_ptr), 0);
desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
 
/* assoc_nents - 1 entries for assoc, 1 for IV */
@@ -987,7 +1005,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
tbl_ptr += sg_count - 1;
tbl_ptr->j_extent = 0;
tbl_ptr++;
-   to_talitos_ptr(tbl_ptr, edesc->iv_dma);
+   to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0);
tbl_ptr->len = cpu_to_be16(ivsize);
tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
 
@@ -996,14 +1014,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
} else {
if (areq->assoclen)
to_talitos_ptr(&desc->ptr[1],
-  sg_dma_address(areq->assoc));
+  sg_dma_address(areq->assoc), 0);
else
-   to_talitos_ptr(&desc->ptr[1], edesc->iv_dma)

[PATCH v3 11/17] crypto: talitos - adaptation of talitos_submit() for SEC1

2015-04-17 Thread Christophe Leroy
SEC1 descriptor is a bit different to SEC2+ descriptor.
talitos_submit() will have to copy hdr field into hdr1 field and
send the descriptor starting at hdr1 up to next_desc.
For SEC2, it remains unchanged and next_desc is just ignored.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 23 +++
 drivers/crypto/talitos.h |  2 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 678b528..e6ea651 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -236,6 +236,7 @@ int talitos_submit(struct device *dev, int ch, struct 
talitos_desc *desc,
struct talitos_request *request;
unsigned long flags;
int head;
+   bool is_sec1 = has_ftr_sec1(priv);
 
spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
 
@@ -249,8 +250,17 @@ int talitos_submit(struct device *dev, int ch, struct 
talitos_desc *desc,
request = &priv->chan[ch].fifo[head];
 
/* map descriptor and save caller data */
-   request->dma_desc = dma_map_single(dev, desc, sizeof(*desc),
-  DMA_BIDIRECTIONAL);
+   if (is_sec1) {
+   desc->hdr1 = desc->hdr;
+   desc->next_desc = 0;
+   request->dma_desc = dma_map_single(dev, &desc->hdr1,
+  TALITOS_DESC_SIZE,
+  DMA_BIDIRECTIONAL);
+   } else {
+   request->dma_desc = dma_map_single(dev, desc,
+  TALITOS_DESC_SIZE,
+  DMA_BIDIRECTIONAL);
+   }
request->callback = callback;
request->context = context;
 
@@ -282,16 +292,21 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
struct talitos_request *request, saved_req;
unsigned long flags;
int tail, status;
+   bool is_sec1 = has_ftr_sec1(priv);
 
spin_lock_irqsave(&priv->chan[ch].tail_lock, flags);
 
tail = priv->chan[ch].tail;
while (priv->chan[ch].fifo[tail].desc) {
+   __be32 hdr;
+
request = &priv->chan[ch].fifo[tail];
 
/* descriptors with their done bits set don't get the error */
rmb();
-   if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
+   hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr;
+
+   if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
status = 0;
else
if (!error)
@@ -300,7 +315,7 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
status = error;
 
dma_unmap_single(dev, request->dma_desc,
-sizeof(struct talitos_desc),
+TALITOS_DESC_SIZE,
 DMA_BIDIRECTIONAL);
 
/* copy entries so we can call callback outside lock */
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index b0bdb4e..f827c04 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -69,6 +69,8 @@ struct talitos_desc {
__be32 next_desc;   /* next descriptor (SEC1) */
 };
 
+#define TALITOS_DESC_SIZE  (sizeof(struct talitos_desc) - sizeof(__be32))
+
 /**
  * talitos_request - descriptor submission request
  * @desc: descriptor pointer (kernel virtual)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 12/17] crypto: talitos - base address for Execution Units

2015-04-17 Thread Christophe Leroy
SEC 1.0, 1.2 and 2.x+ have different EU base addresses, so we need to
define pointers for each EU in the driver private data structure.
The proper address is set by the probe function depending on the
SEC type, in order to provide access to the proper address.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 83 
 drivers/crypto/talitos.h | 72 +
 2 files changed, 100 insertions(+), 55 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index e6ea651..6d77699 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -208,7 +208,7 @@ static int init_device(struct device *dev)
 
/* disable integrity check error interrupts (use writeback instead) */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
-   setbits32(priv->reg + TALITOS_MDEUICR_LO,
+   setbits32(priv->reg_mdeu + TALITOS_EUICR_LO,
  TALITOS_MDEUICR_LO_ICE);
 
return 0;
@@ -424,44 +424,44 @@ static void report_eu_error(struct device *dev, int ch, 
u32 desc_hdr)
switch (desc_hdr & DESC_HDR_SEL0_MASK) {
case DESC_HDR_SEL0_AFEU:
dev_err(dev, "AFEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_AFEUISR),
-   in_be32(priv->reg + TALITOS_AFEUISR_LO));
+   in_be32(priv->reg_afeu + TALITOS_EUISR),
+   in_be32(priv->reg_afeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_DEU:
dev_err(dev, "DEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_DEUISR),
-   in_be32(priv->reg + TALITOS_DEUISR_LO));
+   in_be32(priv->reg_deu + TALITOS_EUISR),
+   in_be32(priv->reg_deu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_MDEUA:
case DESC_HDR_SEL0_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_MDEUISR),
-   in_be32(priv->reg + TALITOS_MDEUISR_LO));
+   in_be32(priv->reg_mdeu + TALITOS_EUISR),
+   in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_RNG:
dev_err(dev, "RNGUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_RNGUISR),
-   in_be32(priv->reg + TALITOS_RNGUISR_LO));
+   in_be32(priv->reg_rngu + TALITOS_ISR),
+   in_be32(priv->reg_rngu + TALITOS_ISR_LO));
break;
case DESC_HDR_SEL0_PKEU:
dev_err(dev, "PKEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_PKEUISR),
-   in_be32(priv->reg + TALITOS_PKEUISR_LO));
+   in_be32(priv->reg_pkeu + TALITOS_EUISR),
+   in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_AESU:
dev_err(dev, "AESUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_AESUISR),
-   in_be32(priv->reg + TALITOS_AESUISR_LO));
+   in_be32(priv->reg_aesu + TALITOS_EUISR),
+   in_be32(priv->reg_aesu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_CRCU:
dev_err(dev, "CRCUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_CRCUISR),
-   in_be32(priv->reg + TALITOS_CRCUISR_LO));
+   in_be32(priv->reg_crcu + TALITOS_EUISR),
+   in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_KEU:
dev_err(dev, "KEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_KEUISR),
-   in_be32(priv->reg + TALITOS_KEUISR_LO));
+   in_be32(priv->reg_pkeu + TALITOS_EUISR),
+   in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
}
 
@@ -469,13 +469,13 @@ static void report_eu_error(struct device *dev, int ch, 
u32 desc_hdr)
case DESC_HDR_SEL1_MDEUA:
case DESC_HDR_SEL1_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_MDEUISR),
-   in_be32(priv->reg + TALITOS_MDEUISR_LO));
+   in_be32(priv->reg_mdeu + TALITOS_EUISR),
+   in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL1_CRCU:
dev_err(dev, 

[PATCH v3 13/17] crypto: talitos - adapt interrupts and reset functions to SEC1

2015-04-17 Thread Christophe Leroy
This patch adapts the interrupts handling and reset function for
SEC1. On SEC1, registers are almost similar to SEC2+, but bits
are sometimes located at different places. So we need to define
TALITOS1 and TALITOS2 versions of some fields, and manage according
to whether it is SEC1 or SEC2.

On SEC1, only one interrupt vector is dedicated to the SEC, so only
interrupt_4ch is needed.

On SEC1, interrupts are enabled by clearing related bits in IMR,
while on SEC2, interrupts are enabled by seting the bits in IMR.

SEC1 also performs parity verification in the DES Unit. We have
to disable this feature because the test vectors provided in
the kernel have parity errors.

In reset functions, only SEC2 supports continuation after error.
For SEC1, we have to reset in all cases.

For errors handling, SEC2+ names have been kept, but displayed
text have been amended to reflect exact meaning on SEC1.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 227 +++
 drivers/crypto/talitos.h |  39 +---
 2 files changed, 199 insertions(+), 67 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6d77699..1265405 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -124,12 +124,23 @@ static int reset_channel(struct device *dev, int ch)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
+   bool is_sec1 = has_ftr_sec1(priv);
 
-   setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET);
+   if (is_sec1) {
+   setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
+ TALITOS1_CCCR_LO_RESET);
 
-   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET)
-  && --timeout)
-   cpu_relax();
+   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) &
+   TALITOS1_CCCR_LO_RESET) && --timeout)
+   cpu_relax();
+   } else {
+   setbits32(priv->chan[ch].reg + TALITOS_CCCR,
+ TALITOS2_CCCR_RESET);
+
+   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
+   TALITOS2_CCCR_RESET) && --timeout)
+   cpu_relax();
+   }
 
if (timeout == 0) {
dev_err(dev, "failed to reset channel %d\n", ch);
@@ -152,11 +163,12 @@ static int reset_device(struct device *dev)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
-   u32 mcr = TALITOS_MCR_SWR;
+   bool is_sec1 = has_ftr_sec1(priv);
+   u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR;
 
setbits32(priv->reg + TALITOS_MCR, mcr);
 
-   while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR)
+   while ((in_be32(priv->reg + TALITOS_MCR) & mcr)
   && --timeout)
cpu_relax();
 
@@ -180,6 +192,7 @@ static int init_device(struct device *dev)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
int ch, err;
+   bool is_sec1 = has_ftr_sec1(priv);
 
/*
 * Master reset
@@ -203,8 +216,15 @@ static int init_device(struct device *dev)
}
 
/* enable channel done and error interrupts */
-   setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT);
-   setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT);
+   if (is_sec1) {
+   clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT);
+   clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT);
+   /* disable parity error check in DEU (erroneous? test vect.) */
+   setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE);
+   } else {
+   setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT);
+   setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT);
+   }
 
/* disable integrity check error interrupts (use writeback instead) */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
@@ -349,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
 /*
  * process completed requests for channels that have done status
  */
-#define DEF_TALITOS_DONE(name, ch_done_mask)   \
-static void talitos_done_##name(unsigned long data)\
+#define DEF_TALITOS1_DONE(name, ch_done_mask)  \
+static void talitos1_done_##name(unsigned long data)   \
+{  \
+   struct device *dev = (struct device *)data; \
+   struct talitos_private *priv = dev_get_drvdata(dev);\
+   unsigned long flags;

[PATCH v3 14/17] crypto: talitos - implement scatter/gather copy for SEC1

2015-04-17 Thread Christophe Leroy
SEC1 doesn't support scatter/gather, SEC1 doesn't handle link tables.
Therefore, for SEC1 we have to do it by SW. For that, we reserve
space at the end of the extended descriptor, in lieu of the space
reserved for the link tables on SEC2, and we perform sg_copy() when
preparing the descriptors

We also adapt the max buffer size which is only 32k on SEC1 while it
is 64k on SEC2+

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 138 ++-
 drivers/crypto/talitos.h |   3 +-
 2 files changed, 103 insertions(+), 38 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1265405..dddf4b3 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -866,9 +866,10 @@ badkey:
  * @dst_chained: whether dst is chained or not
  * @iv_dma: dma address of iv for checking continuity and link table
  * @dma_len: length of dma mapped link_tbl space
- * @dma_link_tbl: bus physical address of link_tbl
+ * @dma_link_tbl: bus physical address of link_tbl/buf
  * @desc: h/w descriptor
- * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1)
+ * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
+ * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
  *
  * if decrypting (with authcheck), or either one of src_nents or dst_nents
  * is greater than 1, an integrity check value is concatenated to the end
@@ -885,7 +886,10 @@ struct talitos_edesc {
int dma_len;
dma_addr_t dma_link_tbl;
struct talitos_desc desc;
-   struct talitos_ptr link_tbl[0];
+   union {
+   struct talitos_ptr link_tbl[0];
+   u8 buf[0];
+   };
 };
 
 static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
@@ -1282,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct 
device *dev,
dma_addr_t iv_dma = 0;
gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
  GFP_ATOMIC;
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+   int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN;
 
-   if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
+   if (cryptlen + authsize > max_len) {
dev_err(dev, "length exceeds h/w max limit\n");
return ERR_PTR(-EINVAL);
}
@@ -1327,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct 
device *dev,
 */
alloc_len = sizeof(struct talitos_edesc);
if (assoc_nents || src_nents || dst_nents) {
-   dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
- sizeof(struct talitos_ptr) + authsize;
+   if (is_sec1)
+   dma_len = src_nents ? cryptlen : 0 +
+ dst_nents ? cryptlen : 0;
+   else
+   dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
+ sizeof(struct talitos_ptr) + authsize;
alloc_len += dma_len;
} else {
dma_len = 0;
@@ -1485,7 +1496,27 @@ static void unmap_sg_talitos_ptr(struct device *dev, 
struct scatterlist *src,
 struct scatterlist *dst, unsigned int len,
 struct talitos_edesc *edesc)
 {
-   talitos_sg_unmap(dev, edesc, src, dst);
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+
+   if (is_sec1) {
+   if (!edesc->src_nents) {
+   dma_unmap_sg(dev, src, 1,
+dst != src ? DMA_TO_DEVICE
+   : DMA_BIDIRECTIONAL);
+   }
+   if (dst && edesc->dst_nents) {
+   dma_sync_single_for_device(dev,
+  edesc->dma_link_tbl + len,
+  len, DMA_FROM_DEVICE);
+   sg_copy_from_buffer(dst, edesc->dst_nents ? : 1,
+   edesc->buf + len, len);
+   } else if (dst && dst != src) {
+   dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE);
+   }
+   } else {
+   talitos_sg_unmap(dev, edesc, src, dst);
+   }
 }
 
 static void common_nonsnoop_unmap(struct device *dev,
@@ -1528,25 +1559,42 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
bool is_sec1 = has_ftr_sec1(priv);
 
to_talitos_ptr_len(ptr, len, is_sec1);
-   to_talitos_ptr_extent_clear(ptr, is_sec1);
 
-   sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
- edesc->src_chained);
+   if (is_sec1) {
+ 

[PATCH v3 15/17] crypto: talitos - SEC1 bugs on 0 data hash

2015-04-17 Thread Christophe Leroy
SEC1 bugs on 0 data hash, so we submit an already padded block representing 0 
data

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index dddf4b3..f1406d7b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1797,6 +1797,27 @@ static void ahash_done(struct device *dev,
areq->base.complete(&areq->base, err);
 }
 
+/*
+ * SEC1 doesn't like hashing of 0 sized message, so we do the padding
+ * ourself and submit a padded block
+ */
+void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
+  struct talitos_edesc *edesc,
+  struct talitos_ptr *ptr)
+{
+   static u8 padded_hash[64] = {
+   0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   };
+
+   pr_err_once("Bug in SEC1, padding ourself\n");
+   edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD;
+   map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash),
+  (char *)padded_hash, DMA_TO_DEVICE);
+}
+
 static int common_nonsnoop_hash(struct talitos_edesc *edesc,
struct ahash_request *areq, unsigned int length,
void (*callback) (struct device *dev,
@@ -1857,6 +1878,9 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
/* last DWORD empty */
desc->ptr[6] = zero_entry;
 
+   if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0)
+   talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
+
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
common_nonsnoop_hash_unmap(dev, edesc, areq);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 16/17] crypto: talitos - Add fsl,sec1.0 compatible

2015-04-17 Thread Christophe Leroy
We add a specific compatible for SEC1, to handle the differences
between SEC1 and SEC2+

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index f1406d7b..c04074d 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3086,9 +3086,16 @@ err_out:
 }
 
 static const struct of_device_id talitos_match[] = {
+#ifdef CONFIG_CRYPTO_DEV_TALITOS1
+   {
+   .compatible = "fsl,sec1.0",
+   },
+#endif
+#ifdef CONFIG_CRYPTO_DEV_TALITOS2
{
.compatible = "fsl,sec2.0",
},
+#endif
{},
 };
 MODULE_DEVICE_TABLE(of, talitos_match);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 17/17] crypto: talitos - Update DT bindings with SEC1

2015-04-17 Thread Christophe Leroy
This patch updates the documentation by including SEC1 into SEC2/3 doc

Signed-off-by: Christophe Leroy 
---
 Documentation/devicetree/bindings/crypto/fsl-sec2.txt | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt 
b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
index 38988ef..f0d926b 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -1,9 +1,11 @@
-Freescale SoC SEC Security Engines versions 2.x-3.x
+Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
 
 Required properties:
 
 - compatible : Should contain entries for this and backward compatible
-  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0"
+  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3)
+ e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1)
+warning: SEC1 and SEC2 are mutually exclusive
 - reg : Offset and length of the register set for the device
 - interrupts : the SEC's interrupt number
 - fsl,num-channels : An integer representing the number of channels
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 00/17] crypto: talitos - Add support for SEC1

2015-04-17 Thread Christophe Leroy
The purpose of this set of patchs is to add to talitos crypto driver
the support for the SEC1 version of the security engine, which is
found in mpc885 and mpc8272 processors.

v3 is a complete rework of the patchset. Since a kernel can be built
with support for both MPC82xx and MPC83xx at the same time, talitos
driver shall support both SEC1 and SEC2+ at the same time.

Based on cryptodev-2.6 tree

Christophe Leroy (17):
  crypto: talitos - Use zero entry to init descriptors ptrs to zero
  crypto: talitos - Refactor the sg in/out chain allocation
  crypto: talitos - talitos_ptr renamed ptr for more lisibility
  crypto: talitos - Add a helper function to clear j_extent field
  crypto: talitos - remove param 'extent' in map_single_talitos_ptr()
  crypto: talitos - helper function for ptr len
  crypto: talitos - enhanced talitos_desc struct for SEC1
  crypto: talitos - add sub-choice in talitos CONFIG for SEC1
  crypto: talitos - Add a feature to tag SEC1
  crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+
  crypto: talitos - adaptation of talitos_submit() for SEC1
  crypto: talitos - base address for Execution Units
  crypto: talitos - adapt interrupts and reset functions to SEC1
  crypto: talitos - implement scatter/gather copy for SEC1
  crypto: talitos - SEC1 bugs on 0 data hash
  crypto: talitos - Add fsl,sec1.0 compatible
  crypto: talitos - Update DT bindings with SEC1

 .../devicetree/bindings/crypto/fsl-sec2.txt|   6 +-
 drivers/crypto/Kconfig |  18 +
 drivers/crypto/talitos.c   | 727 +++--
 drivers/crypto/talitos.h   | 153 +++--
 4 files changed, 644 insertions(+), 260 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 01/17] crypto: talitos - Use zero entry to init descriptors ptrs to zero

2015-04-17 Thread Christophe Leroy
Do use zero_entry value to init the descriptors ptrs to zero instead of
writing 0 in each field

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 857414a..7bf1b2b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1373,9 +1373,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
int sg_count, ret;
 
/* first DWORD empty */
-   desc->ptr[0].len = 0;
-   to_talitos_ptr(&desc->ptr[0], 0);
-   desc->ptr[0].j_extent = 0;
+   desc->ptr[0] = zero_entry;
 
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
@@ -1445,9 +1443,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
   DMA_FROM_DEVICE);
 
/* last DWORD empty */
-   desc->ptr[6].len = 0;
-   to_talitos_ptr(&desc->ptr[6], 0);
-   desc->ptr[6].j_extent = 0;
+   desc->ptr[6] = zero_entry;
 
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 02/17] crypto: talitos - Refactor the sg in/out chain allocation

2015-04-17 Thread Christophe Leroy
This patch refactors the handling of the input and output data that is quite
similar in several functions

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 159 ---
 1 file changed, 81 insertions(+), 78 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 7bf1b2b..5a7e345 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1327,16 +1327,23 @@ static int ablkcipher_setkey(struct crypto_ablkcipher 
*cipher,
return 0;
 }
 
+static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src,
+struct scatterlist *dst, unsigned int len,
+struct talitos_edesc *edesc)
+{
+   talitos_sg_unmap(dev, edesc, src, dst);
+}
+
 static void common_nonsnoop_unmap(struct device *dev,
  struct talitos_edesc *edesc,
  struct ablkcipher_request *areq)
 {
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE);
+
+   unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE);
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE);
 
-   talitos_sg_unmap(dev, edesc, areq->src, areq->dst);
-
if (edesc->dma_len)
dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len,
 DMA_BIDIRECTIONAL);
@@ -1358,6 +1365,65 @@ static void ablkcipher_done(struct device *dev,
areq->base.complete(&areq->base, err);
 }
 
+int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src,
+ unsigned int len, struct talitos_edesc *edesc,
+ enum dma_data_direction dir, struct talitos_ptr *ptr)
+{
+   int sg_count;
+
+   ptr->len = cpu_to_be16(len);
+   ptr->j_extent = 0;
+
+   sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
+ edesc->src_chained);
+
+   if (sg_count == 1) {
+   to_talitos_ptr(ptr, sg_dma_address(src));
+   } else {
+   sg_count = sg_to_link_tbl(src, sg_count, len,
+ &edesc->link_tbl[0]);
+   if (sg_count > 1) {
+   to_talitos_ptr(ptr, edesc->dma_link_tbl);
+   ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+   dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+  edesc->dma_len,
+  DMA_BIDIRECTIONAL);
+   } else {
+   /* Only one segment now, so no link tbl needed */
+   to_talitos_ptr(ptr, sg_dma_address(src));
+   }
+   }
+   return sg_count;
+}
+
+void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst,
+   unsigned int len, struct talitos_edesc *edesc,
+   enum dma_data_direction dir,
+   struct talitos_ptr *ptr, int sg_count)
+{
+   ptr->len = cpu_to_be16(len);
+   ptr->j_extent = 0;
+
+   if (dir != DMA_NONE)
+   sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
+ dir, edesc->dst_chained);
+
+   if (sg_count == 1) {
+   to_talitos_ptr(ptr, sg_dma_address(dst));
+   } else {
+   struct talitos_ptr *link_tbl_ptr =
+   &edesc->link_tbl[edesc->src_nents + 1];
+
+   to_talitos_ptr(ptr, edesc->dma_link_tbl +
+ (edesc->src_nents + 1) *
+ sizeof(struct talitos_ptr));
+   ptr->j_extent |= DESC_PTR_LNKTBL_JUMP;
+   sg_count = sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr);
+   dma_sync_single_for_device(dev, edesc->dma_link_tbl,
+  edesc->dma_len, DMA_BIDIRECTIONAL);
+   }
+}
+
 static int common_nonsnoop(struct talitos_edesc *edesc,
   struct ablkcipher_request *areq,
   void (*callback) (struct device *dev,
@@ -1387,56 +1453,16 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
/*
 * cipher in
 */
-   desc->ptr[3].len = cpu_to_be16(cryptlen);
-   desc->ptr[3].j_extent = 0;
-
-   sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1,
- (areq->src == areq->dst) ? DMA_BIDIRECTIONAL
-  : DMA_TO_DEVICE,
- edesc->src_chained);

[PATCH v3 03/17] crypto: talitos - talitos_ptr renamed ptr for more lisibility

2015-04-17 Thread Christophe Leroy
Linux CodyingStyle recommends to use short variables for local
variables. ptr is just good enough for those 3 lines functions.
It helps keep single lines shorter than 80 characters.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 5a7e345..fca0aed 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -55,37 +55,37 @@
 
 #include "talitos.h"
 
-static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t 
dma_addr)
+static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr)
 {
-   talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
-   talitos_ptr->eptr = upper_32_bits(dma_addr);
+   ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
+   ptr->eptr = upper_32_bits(dma_addr);
 }
 
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
 static void map_single_talitos_ptr(struct device *dev,
-  struct talitos_ptr *talitos_ptr,
+  struct talitos_ptr *ptr,
   unsigned short len, void *data,
   unsigned char extent,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
-   talitos_ptr->len = cpu_to_be16(len);
-   to_talitos_ptr(talitos_ptr, dma_addr);
-   talitos_ptr->j_extent = extent;
+   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr(ptr, dma_addr);
+   ptr->j_extent = extent;
 }
 
 /*
  * unmap bus single (contiguous) h/w descriptor pointer
  */
 static void unmap_single_talitos_ptr(struct device *dev,
-struct talitos_ptr *talitos_ptr,
+struct talitos_ptr *ptr,
 enum dma_data_direction dir)
 {
-   dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr),
-be16_to_cpu(talitos_ptr->len), dir);
+   dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
+be16_to_cpu(ptr->len), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 04/17] crypto: talitos - Add a helper function to clear j_extent field

2015-04-17 Thread Christophe Leroy
j_extent field is specific to SEC2 so we add a helper function to clear it
so that SEC1 can redefine that function as nop

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index fca0aed..c93f79b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -61,6 +61,11 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, 
dma_addr_t dma_addr)
ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
+{
+   ptr->j_extent = 0;
+}
+
 /*
  * map virtual single (contiguous) pointer to h/w descriptor pointer
  */
@@ -1372,7 +1377,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
int sg_count;
 
ptr->len = cpu_to_be16(len);
-   ptr->j_extent = 0;
+   to_talitos_ptr_extent_clear(ptr);
 
sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
  edesc->src_chained);
@@ -1402,7 +1407,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct 
scatterlist *dst,
struct talitos_ptr *ptr, int sg_count)
 {
ptr->len = cpu_to_be16(len);
-   ptr->j_extent = 0;
+   to_talitos_ptr_extent_clear(ptr);
 
if (dir != DMA_NONE)
sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1,
@@ -1444,7 +1449,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
desc->ptr[1].len = cpu_to_be16(ivsize);
-   desc->ptr[1].j_extent = 0;
+   to_talitos_ptr_extent_clear(&desc->ptr[1]);
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 05/17] crypto: talitos - remove param 'extent' in map_single_talitos_ptr()

2015-04-17 Thread Christophe Leroy
map_single_talitos_ptr() is always called with extent == 0, so lets remove this 
unused parameter

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 21 ++---
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c93f79b..81e5636 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -72,14 +72,13 @@ static void to_talitos_ptr_extent_clear(struct talitos_ptr 
*ptr)
 static void map_single_talitos_ptr(struct device *dev,
   struct talitos_ptr *ptr,
   unsigned short len, void *data,
-  unsigned char extent,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
ptr->len = cpu_to_be16(len);
to_talitos_ptr(ptr, dma_addr);
-   ptr->j_extent = extent;
+   to_talitos_ptr_extent_clear(ptr);
 }
 
 /*
@@ -958,7 +957,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
 
/* hmac key */
map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key,
-  0, DMA_TO_DEVICE);
+  DMA_TO_DEVICE);
 
/* hmac data */
desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize);
@@ -1002,7 +1001,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen,
-  (char *)&ctx->key + ctx->authkeylen, 0,
+  (char *)&ctx->key + ctx->authkeylen,
   DMA_TO_DEVICE);
 
/*
@@ -1080,7 +1079,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
}
 
/* iv out */
-   map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0,
+   map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv,
   DMA_FROM_DEVICE);
 
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
@@ -1453,7 +1452,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 
/* cipher key */
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-  (char *)&ctx->key, 0, DMA_TO_DEVICE);
+  (char *)&ctx->key, DMA_TO_DEVICE);
 
/*
 * cipher in
@@ -1470,7 +1469,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
   &desc->ptr[4], sg_count);
 
/* iv out */
-   map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0,
+   map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv,
   DMA_FROM_DEVICE);
 
/* last DWORD empty */
@@ -1595,7 +1594,7 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
if (!req_ctx->first || req_ctx->swinit) {
map_single_talitos_ptr(dev, &desc->ptr[1],
   req_ctx->hw_context_size,
-  (char *)req_ctx->hw_context, 0,
+  (char *)req_ctx->hw_context,
   DMA_TO_DEVICE);
req_ctx->swinit = 0;
} else {
@@ -1607,7 +1606,7 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
/* HMAC key */
if (ctx->keylen)
map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen,
-  (char *)&ctx->key, 0, DMA_TO_DEVICE);
+  (char *)&ctx->key, DMA_TO_DEVICE);
else
desc->ptr[2] = zero_entry;
 
@@ -1624,11 +1623,11 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
if (req_ctx->last)
map_single_talitos_ptr(dev, &desc->ptr[5],
   crypto_ahash_digestsize(tfm),
-  areq->result, 0, DMA_FROM_DEVICE);
+  areq->result, DMA_FROM_DEVICE);
else
map_single_talitos_ptr(dev, &desc->ptr[5],
   req_ctx->hw_context_size,
-  req_ctx->hw_context, 0, DMA_FROM_DEVICE);
+  req_ctx->hw_context, DMA_FROM_DEVICE);
 
/* last DWORD empty */
desc->ptr[6] = zero_entry;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 06/17] crypto: talitos - helper function for ptr len

2015-04-17 Thread Christophe Leroy
This patch adds a helper function for reads and writes of the len
param of the talitos descriptor. This will help implement
SEC1 later.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 24 +---
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 81e5636..bca6ded 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -61,6 +61,16 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, 
dma_addr_t dma_addr)
ptr->eptr = upper_32_bits(dma_addr);
 }
 
+static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len)
+{
+   ptr->len = cpu_to_be16(len);
+}
+
+static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr)
+{
+   return be16_to_cpu(ptr->len);
+}
+
 static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
 {
ptr->j_extent = 0;
@@ -76,7 +86,7 @@ static void map_single_talitos_ptr(struct device *dev,
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
 
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr(ptr, dma_addr);
to_talitos_ptr_extent_clear(ptr);
 }
@@ -89,7 +99,7 @@ static void unmap_single_talitos_ptr(struct device *dev,
 enum dma_data_direction dir)
 {
dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
-be16_to_cpu(ptr->len), dir);
+from_talitos_ptr_len(ptr), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
@@ -1375,7 +1385,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
 {
int sg_count;
 
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr_extent_clear(ptr);
 
sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
@@ -1405,7 +1415,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct 
scatterlist *dst,
enum dma_data_direction dir,
struct talitos_ptr *ptr, int sg_count)
 {
-   ptr->len = cpu_to_be16(len);
+   to_talitos_ptr_len(ptr, len);
to_talitos_ptr_extent_clear(ptr);
 
if (dir != DMA_NONE)
@@ -1447,7 +1457,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc,
 
/* cipher iv */
to_talitos_ptr(&desc->ptr[1], edesc->iv_dma);
-   desc->ptr[1].len = cpu_to_be16(ivsize);
+   to_talitos_ptr_len(&desc->ptr[1], ivsize);
to_talitos_ptr_extent_clear(&desc->ptr[1]);
 
/* cipher key */
@@ -1539,11 +1549,11 @@ static void common_nonsnoop_hash_unmap(struct device 
*dev,
unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc);
 
/* When using hashctx-in, must unmap it. */
-   if (edesc->desc.ptr[1].len)
+   if (from_talitos_ptr_len(&edesc->desc.ptr[1]))
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1],
 DMA_TO_DEVICE);
 
-   if (edesc->desc.ptr[2].len)
+   if (from_talitos_ptr_len(&edesc->desc.ptr[2]))
unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2],
 DMA_TO_DEVICE);
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 07/17] crypto: talitos - enhanced talitos_desc struct for SEC1

2015-04-17 Thread Christophe Leroy
This patch enhances the talitos_desc struct with fields for SEC1.
SEC1 has only one header field, and has a 'next_desc' field in
addition.
This mixed descriptor will continue to fit SEC2, and for SEC1
we will recopy hdr value into hdr1 value in talitos_submit()

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.h | 20 
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 61a1405..f078da1 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -37,9 +37,17 @@
 
 /* descriptor pointer entry */
 struct talitos_ptr {
-   __be16 len; /* length */
-   u8 j_extent;/* jump to sg link table and/or extent */
-   u8 eptr;/* extended address */
+   union {
+   struct {/* SEC2 format */
+   __be16 len; /* length */
+   u8 j_extent;/* jump to sg link table and/or extent*/
+   u8 eptr;/* extended address */
+   };
+   struct {/* SEC1 format */
+   __be16 res;
+   __be16 len1;/* length */
+   };
+   };
__be32 ptr; /* address */
 };
 
@@ -53,8 +61,12 @@ static const struct talitos_ptr zero_entry = {
 /* descriptor */
 struct talitos_desc {
__be32 hdr; /* header high bits */
-   __be32 hdr_lo;  /* header low bits */
+   union {
+   __be32 hdr_lo;  /* header low bits */
+   __be32 hdr1;/* header for SEC1 */
+   };
struct talitos_ptr ptr[7];  /* ptr/len pair array */
+   __be32 next_desc;   /* next descriptor (SEC1) */
 };
 
 /**
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 08/17] crypto: talitos - add sub-choice in talitos CONFIG for SEC1

2015-04-17 Thread Christophe Leroy
This patch adds a CONFIG option to select SEC1, SEC2+ or both.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/Kconfig | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 800bf41..8a76a01 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS
  To compile this driver as a module, choose M here: the module
  will be called talitos.
 
+config CRYPTO_DEV_TALITOS1
+   bool "SEC1 (SEC 1.0 and SEC Lite 1.2)"
+   depends on CRYPTO_DEV_TALITOS
+   depends on PPC_8xx || PPC_82xx
+   default y
+   help
+ Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0
+ found on MPC82xx or the Freescale Security Engine (SEC Lite)
+ version 1.2 found on MPC8xx
+
+config CRYPTO_DEV_TALITOS2
+   bool "SEC2+ (SEC version 2.0 or upper)"
+   depends on CRYPTO_DEV_TALITOS
+   default y if !PPC_8xx
+   help
+ Say 'Y' here to use the Freescale Security Engine (SEC)
+ version 2 and following as found on MPC83xx, MPC85xx, etc ...
+
 config CRYPTO_DEV_IXP4XX
tristate "Driver for IXP4xx crypto hardware acceleration"
depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 09/17] crypto: talitos - Add a feature to tag SEC1

2015-04-17 Thread Christophe Leroy
We add a new feature in the features field, to mark compatible
"fsl,sec1.0"
We also define a helper function called has_ftr_sec1() to help
functions quickly determine if they are running on SEC1 or SEC2+.
When only SEC1 or SEC2 is compiled in, has_ftr_sec1() return
trivial corresponding value. If both are compiled in, feature
field is checked.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c |  3 +++
 drivers/crypto/talitos.h | 17 +
 2 files changed, 20 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index bca6ded..db95023 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -2709,6 +2709,9 @@ static int talitos_probe(struct platform_device *ofdev)
  TALITOS_FTR_SHA224_HWINIT |
  TALITOS_FTR_HMAC_OK;
 
+   if (of_device_is_compatible(np, "fsl,sec1.0"))
+   priv->features |= TALITOS_FTR_SEC1;
+
priv->chan = kzalloc(sizeof(struct talitos_channel) *
 priv->num_channels, GFP_KERNEL);
if (!priv->chan) {
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index f078da1..b0bdb4e 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -156,6 +156,23 @@ extern int talitos_submit(struct device *dev, int ch, 
struct talitos_desc *desc,
 #define TALITOS_FTR_HW_AUTH_CHECK 0x0002
 #define TALITOS_FTR_SHA224_HWINIT 0x0004
 #define TALITOS_FTR_HMAC_OK 0x0008
+#define TALITOS_FTR_SEC1 0x0010
+
+/*
+ * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are
+ * defined, we check the features which are set according to the device tree.
+ * Otherwise, we answer true or false directly
+ */
+static inline bool has_ftr_sec1(struct talitos_private *priv)
+{
+#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2)
+   return priv->features & TALITOS_FTR_SEC1 ? true : false;
+#elif defined(CONFIG_CRYPTO_DEV_TALITOS1)
+   return true;
+#else
+   return false;
+#endif
+}
 
 /*
  * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 10/17] crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+

2015-04-17 Thread Christophe Leroy
talitos descriptor is slightly different for SEC1 and SEC2+, so
lets the helper function that fills the descriptor take into account
the type of SEC.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 105 ++-
 1 file changed, 67 insertions(+), 38 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index db95023..678b528 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -55,25 +55,38 @@
 
 #include "talitos.h"
 
-static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr)
+static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr,
+  bool is_sec1)
 {
ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr));
-   ptr->eptr = upper_32_bits(dma_addr);
+   if (!is_sec1)
+   ptr->eptr = upper_32_bits(dma_addr);
 }
 
-static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len)
+static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len,
+  bool is_sec1)
 {
-   ptr->len = cpu_to_be16(len);
+   if (is_sec1) {
+   ptr->res = 0;
+   ptr->len1 = cpu_to_be16(len);
+   } else {
+   ptr->len = cpu_to_be16(len);
+   }
 }
 
-static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr)
+static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr,
+  bool is_sec1)
 {
-   return be16_to_cpu(ptr->len);
+   if (is_sec1)
+   return be16_to_cpu(ptr->len1);
+   else
+   return be16_to_cpu(ptr->len);
 }
 
-static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr)
+static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1)
 {
-   ptr->j_extent = 0;
+   if (!is_sec1)
+   ptr->j_extent = 0;
 }
 
 /*
@@ -85,10 +98,12 @@ static void map_single_talitos_ptr(struct device *dev,
   enum dma_data_direction dir)
 {
dma_addr_t dma_addr = dma_map_single(dev, data, len, dir);
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
 
-   to_talitos_ptr_len(ptr, len);
-   to_talitos_ptr(ptr, dma_addr);
-   to_talitos_ptr_extent_clear(ptr);
+   to_talitos_ptr_len(ptr, len, is_sec1);
+   to_talitos_ptr(ptr, dma_addr, is_sec1);
+   to_talitos_ptr_extent_clear(ptr, is_sec1);
 }
 
 /*
@@ -98,8 +113,11 @@ static void unmap_single_talitos_ptr(struct device *dev,
 struct talitos_ptr *ptr,
 enum dma_data_direction dir)
 {
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+
dma_unmap_single(dev, be32_to_cpu(ptr->ptr),
-from_talitos_ptr_len(ptr), dir);
+from_talitos_ptr_len(ptr, is_sec1), dir);
 }
 
 static int reset_channel(struct device *dev, int ch)
@@ -922,7 +940,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int 
sg_count,
int n_sg = sg_count;
 
while (n_sg--) {
-   to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg));
+   to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0);
link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg));
link_tbl_ptr->j_extent = 0;
link_tbl_ptr++;
@@ -976,7 +994,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off];
 
to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off *
-  sizeof(struct talitos_ptr));
+  sizeof(struct talitos_ptr), 0);
desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP;
 
/* assoc_nents - 1 entries for assoc, 1 for IV */
@@ -987,7 +1005,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
tbl_ptr += sg_count - 1;
tbl_ptr->j_extent = 0;
tbl_ptr++;
-   to_talitos_ptr(tbl_ptr, edesc->iv_dma);
+   to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0);
tbl_ptr->len = cpu_to_be16(ivsize);
tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN;
 
@@ -996,14 +1014,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct 
aead_request *areq,
} else {
if (areq->assoclen)
to_talitos_ptr(&desc->ptr[1],
-  sg_dma_address(areq->assoc));
+  sg_dma_address(areq->assoc), 0);
else
-   to_talitos_ptr(&desc->ptr[1], edesc->iv_dma)

[PATCH v3 11/17] crypto: talitos - adaptation of talitos_submit() for SEC1

2015-04-17 Thread Christophe Leroy
SEC1 descriptor is a bit different to SEC2+ descriptor.
talitos_submit() will have to copy hdr field into hdr1 field and
send the descriptor starting at hdr1 up to next_desc.
For SEC2, it remains unchanged and next_desc is just ignored.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 23 +++
 drivers/crypto/talitos.h |  2 ++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 678b528..e6ea651 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -236,6 +236,7 @@ int talitos_submit(struct device *dev, int ch, struct 
talitos_desc *desc,
struct talitos_request *request;
unsigned long flags;
int head;
+   bool is_sec1 = has_ftr_sec1(priv);
 
spin_lock_irqsave(&priv->chan[ch].head_lock, flags);
 
@@ -249,8 +250,17 @@ int talitos_submit(struct device *dev, int ch, struct 
talitos_desc *desc,
request = &priv->chan[ch].fifo[head];
 
/* map descriptor and save caller data */
-   request->dma_desc = dma_map_single(dev, desc, sizeof(*desc),
-  DMA_BIDIRECTIONAL);
+   if (is_sec1) {
+   desc->hdr1 = desc->hdr;
+   desc->next_desc = 0;
+   request->dma_desc = dma_map_single(dev, &desc->hdr1,
+  TALITOS_DESC_SIZE,
+  DMA_BIDIRECTIONAL);
+   } else {
+   request->dma_desc = dma_map_single(dev, desc,
+  TALITOS_DESC_SIZE,
+  DMA_BIDIRECTIONAL);
+   }
request->callback = callback;
request->context = context;
 
@@ -282,16 +292,21 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
struct talitos_request *request, saved_req;
unsigned long flags;
int tail, status;
+   bool is_sec1 = has_ftr_sec1(priv);
 
spin_lock_irqsave(&priv->chan[ch].tail_lock, flags);
 
tail = priv->chan[ch].tail;
while (priv->chan[ch].fifo[tail].desc) {
+   __be32 hdr;
+
request = &priv->chan[ch].fifo[tail];
 
/* descriptors with their done bits set don't get the error */
rmb();
-   if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
+   hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr;
+
+   if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE)
status = 0;
else
if (!error)
@@ -300,7 +315,7 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
status = error;
 
dma_unmap_single(dev, request->dma_desc,
-sizeof(struct talitos_desc),
+TALITOS_DESC_SIZE,
 DMA_BIDIRECTIONAL);
 
/* copy entries so we can call callback outside lock */
diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index b0bdb4e..f827c04 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -69,6 +69,8 @@ struct talitos_desc {
__be32 next_desc;   /* next descriptor (SEC1) */
 };
 
+#define TALITOS_DESC_SIZE  (sizeof(struct talitos_desc) - sizeof(__be32))
+
 /**
  * talitos_request - descriptor submission request
  * @desc: descriptor pointer (kernel virtual)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 12/17] crypto: talitos - base address for Execution Units

2015-04-17 Thread Christophe Leroy
SEC 1.0, 1.2 and 2.x+ have different EU base addresses, so we need to
define pointers for each EU in the driver private data structure.
The proper address is set by the probe function depending on the
SEC type, in order to provide access to the proper address.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 83 
 drivers/crypto/talitos.h | 72 +
 2 files changed, 100 insertions(+), 55 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index e6ea651..6d77699 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -208,7 +208,7 @@ static int init_device(struct device *dev)
 
/* disable integrity check error interrupts (use writeback instead) */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
-   setbits32(priv->reg + TALITOS_MDEUICR_LO,
+   setbits32(priv->reg_mdeu + TALITOS_EUICR_LO,
  TALITOS_MDEUICR_LO_ICE);
 
return 0;
@@ -424,44 +424,44 @@ static void report_eu_error(struct device *dev, int ch, 
u32 desc_hdr)
switch (desc_hdr & DESC_HDR_SEL0_MASK) {
case DESC_HDR_SEL0_AFEU:
dev_err(dev, "AFEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_AFEUISR),
-   in_be32(priv->reg + TALITOS_AFEUISR_LO));
+   in_be32(priv->reg_afeu + TALITOS_EUISR),
+   in_be32(priv->reg_afeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_DEU:
dev_err(dev, "DEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_DEUISR),
-   in_be32(priv->reg + TALITOS_DEUISR_LO));
+   in_be32(priv->reg_deu + TALITOS_EUISR),
+   in_be32(priv->reg_deu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_MDEUA:
case DESC_HDR_SEL0_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_MDEUISR),
-   in_be32(priv->reg + TALITOS_MDEUISR_LO));
+   in_be32(priv->reg_mdeu + TALITOS_EUISR),
+   in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_RNG:
dev_err(dev, "RNGUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_RNGUISR),
-   in_be32(priv->reg + TALITOS_RNGUISR_LO));
+   in_be32(priv->reg_rngu + TALITOS_ISR),
+   in_be32(priv->reg_rngu + TALITOS_ISR_LO));
break;
case DESC_HDR_SEL0_PKEU:
dev_err(dev, "PKEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_PKEUISR),
-   in_be32(priv->reg + TALITOS_PKEUISR_LO));
+   in_be32(priv->reg_pkeu + TALITOS_EUISR),
+   in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_AESU:
dev_err(dev, "AESUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_AESUISR),
-   in_be32(priv->reg + TALITOS_AESUISR_LO));
+   in_be32(priv->reg_aesu + TALITOS_EUISR),
+   in_be32(priv->reg_aesu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_CRCU:
dev_err(dev, "CRCUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_CRCUISR),
-   in_be32(priv->reg + TALITOS_CRCUISR_LO));
+   in_be32(priv->reg_crcu + TALITOS_EUISR),
+   in_be32(priv->reg_crcu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL0_KEU:
dev_err(dev, "KEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_KEUISR),
-   in_be32(priv->reg + TALITOS_KEUISR_LO));
+   in_be32(priv->reg_pkeu + TALITOS_EUISR),
+   in_be32(priv->reg_pkeu + TALITOS_EUISR_LO));
break;
}
 
@@ -469,13 +469,13 @@ static void report_eu_error(struct device *dev, int ch, 
u32 desc_hdr)
case DESC_HDR_SEL1_MDEUA:
case DESC_HDR_SEL1_MDEUB:
dev_err(dev, "MDEUISR 0x%08x_%08x\n",
-   in_be32(priv->reg + TALITOS_MDEUISR),
-   in_be32(priv->reg + TALITOS_MDEUISR_LO));
+   in_be32(priv->reg_mdeu + TALITOS_EUISR),
+   in_be32(priv->reg_mdeu + TALITOS_EUISR_LO));
break;
case DESC_HDR_SEL1_CRCU:
dev_err(dev, 

[PATCH v3 13/17] crypto: talitos - adapt interrupts and reset functions to SEC1

2015-04-17 Thread Christophe Leroy
This patch adapts the interrupts handling and reset function for
SEC1. On SEC1, registers are almost similar to SEC2+, but bits
are sometimes located at different places. So we need to define
TALITOS1 and TALITOS2 versions of some fields, and manage according
to whether it is SEC1 or SEC2.

On SEC1, only one interrupt vector is dedicated to the SEC, so only
interrupt_4ch is needed.

On SEC1, interrupts are enabled by clearing related bits in IMR,
while on SEC2, interrupts are enabled by seting the bits in IMR.

SEC1 also performs parity verification in the DES Unit. We have
to disable this feature because the test vectors provided in
the kernel have parity errors.

In reset functions, only SEC2 supports continuation after error.
For SEC1, we have to reset in all cases.

For errors handling, SEC2+ names have been kept, but displayed
text have been amended to reflect exact meaning on SEC1.

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 227 +++
 drivers/crypto/talitos.h |  39 +---
 2 files changed, 199 insertions(+), 67 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 6d77699..1265405 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -124,12 +124,23 @@ static int reset_channel(struct device *dev, int ch)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
+   bool is_sec1 = has_ftr_sec1(priv);
 
-   setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET);
+   if (is_sec1) {
+   setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO,
+ TALITOS1_CCCR_LO_RESET);
 
-   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET)
-  && --timeout)
-   cpu_relax();
+   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) &
+   TALITOS1_CCCR_LO_RESET) && --timeout)
+   cpu_relax();
+   } else {
+   setbits32(priv->chan[ch].reg + TALITOS_CCCR,
+ TALITOS2_CCCR_RESET);
+
+   while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) &
+   TALITOS2_CCCR_RESET) && --timeout)
+   cpu_relax();
+   }
 
if (timeout == 0) {
dev_err(dev, "failed to reset channel %d\n", ch);
@@ -152,11 +163,12 @@ static int reset_device(struct device *dev)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
unsigned int timeout = TALITOS_TIMEOUT;
-   u32 mcr = TALITOS_MCR_SWR;
+   bool is_sec1 = has_ftr_sec1(priv);
+   u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR;
 
setbits32(priv->reg + TALITOS_MCR, mcr);
 
-   while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR)
+   while ((in_be32(priv->reg + TALITOS_MCR) & mcr)
   && --timeout)
cpu_relax();
 
@@ -180,6 +192,7 @@ static int init_device(struct device *dev)
 {
struct talitos_private *priv = dev_get_drvdata(dev);
int ch, err;
+   bool is_sec1 = has_ftr_sec1(priv);
 
/*
 * Master reset
@@ -203,8 +216,15 @@ static int init_device(struct device *dev)
}
 
/* enable channel done and error interrupts */
-   setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT);
-   setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT);
+   if (is_sec1) {
+   clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT);
+   clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT);
+   /* disable parity error check in DEU (erroneous? test vect.) */
+   setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE);
+   } else {
+   setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT);
+   setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT);
+   }
 
/* disable integrity check error interrupts (use writeback instead) */
if (priv->features & TALITOS_FTR_HW_AUTH_CHECK)
@@ -349,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int 
error, int reset_ch)
 /*
  * process completed requests for channels that have done status
  */
-#define DEF_TALITOS_DONE(name, ch_done_mask)   \
-static void talitos_done_##name(unsigned long data)\
+#define DEF_TALITOS1_DONE(name, ch_done_mask)  \
+static void talitos1_done_##name(unsigned long data)   \
+{  \
+   struct device *dev = (struct device *)data; \
+   struct talitos_private *priv = dev_get_drvdata(dev);\
+   unsigned long flags;

[PATCH v3 14/17] crypto: talitos - implement scatter/gather copy for SEC1

2015-04-17 Thread Christophe Leroy
SEC1 doesn't support scatter/gather, SEC1 doesn't handle link tables.
Therefore, for SEC1 we have to do it by SW. For that, we reserve
space at the end of the extended descriptor, in lieu of the space
reserved for the link tables on SEC2, and we perform sg_copy() when
preparing the descriptors

We also adapt the max buffer size which is only 32k on SEC1 while it
is 64k on SEC2+

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 138 ++-
 drivers/crypto/talitos.h |   3 +-
 2 files changed, 103 insertions(+), 38 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index 1265405..dddf4b3 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -866,9 +866,10 @@ badkey:
  * @dst_chained: whether dst is chained or not
  * @iv_dma: dma address of iv for checking continuity and link table
  * @dma_len: length of dma mapped link_tbl space
- * @dma_link_tbl: bus physical address of link_tbl
+ * @dma_link_tbl: bus physical address of link_tbl/buf
  * @desc: h/w descriptor
- * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1)
+ * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2)
+ * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1)
  *
  * if decrypting (with authcheck), or either one of src_nents or dst_nents
  * is greater than 1, an integrity check value is concatenated to the end
@@ -885,7 +886,10 @@ struct talitos_edesc {
int dma_len;
dma_addr_t dma_link_tbl;
struct talitos_desc desc;
-   struct talitos_ptr link_tbl[0];
+   union {
+   struct talitos_ptr link_tbl[0];
+   u8 buf[0];
+   };
 };
 
 static int talitos_map_sg(struct device *dev, struct scatterlist *sg,
@@ -1282,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct 
device *dev,
dma_addr_t iv_dma = 0;
gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL :
  GFP_ATOMIC;
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+   int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN;
 
-   if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) {
+   if (cryptlen + authsize > max_len) {
dev_err(dev, "length exceeds h/w max limit\n");
return ERR_PTR(-EINVAL);
}
@@ -1327,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct 
device *dev,
 */
alloc_len = sizeof(struct talitos_edesc);
if (assoc_nents || src_nents || dst_nents) {
-   dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
- sizeof(struct talitos_ptr) + authsize;
+   if (is_sec1)
+   dma_len = src_nents ? cryptlen : 0 +
+ dst_nents ? cryptlen : 0;
+   else
+   dma_len = (src_nents + dst_nents + 2 + assoc_nents) *
+ sizeof(struct talitos_ptr) + authsize;
alloc_len += dma_len;
} else {
dma_len = 0;
@@ -1485,7 +1496,27 @@ static void unmap_sg_talitos_ptr(struct device *dev, 
struct scatterlist *src,
 struct scatterlist *dst, unsigned int len,
 struct talitos_edesc *edesc)
 {
-   talitos_sg_unmap(dev, edesc, src, dst);
+   struct talitos_private *priv = dev_get_drvdata(dev);
+   bool is_sec1 = has_ftr_sec1(priv);
+
+   if (is_sec1) {
+   if (!edesc->src_nents) {
+   dma_unmap_sg(dev, src, 1,
+dst != src ? DMA_TO_DEVICE
+   : DMA_BIDIRECTIONAL);
+   }
+   if (dst && edesc->dst_nents) {
+   dma_sync_single_for_device(dev,
+  edesc->dma_link_tbl + len,
+  len, DMA_FROM_DEVICE);
+   sg_copy_from_buffer(dst, edesc->dst_nents ? : 1,
+   edesc->buf + len, len);
+   } else if (dst && dst != src) {
+   dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE);
+   }
+   } else {
+   talitos_sg_unmap(dev, edesc, src, dst);
+   }
 }
 
 static void common_nonsnoop_unmap(struct device *dev,
@@ -1528,25 +1559,42 @@ int map_sg_in_talitos_ptr(struct device *dev, struct 
scatterlist *src,
bool is_sec1 = has_ftr_sec1(priv);
 
to_talitos_ptr_len(ptr, len, is_sec1);
-   to_talitos_ptr_extent_clear(ptr, is_sec1);
 
-   sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir,
- edesc->src_chained);
+   if (is_sec1) {
+ 

[PATCH v3 15/17] crypto: talitos - SEC1 bugs on 0 data hash

2015-04-17 Thread Christophe Leroy
SEC1 bugs on 0 data hash, so we submit an already padded block representing 0 
data

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index dddf4b3..f1406d7b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1797,6 +1797,27 @@ static void ahash_done(struct device *dev,
areq->base.complete(&areq->base, err);
 }
 
+/*
+ * SEC1 doesn't like hashing of 0 sized message, so we do the padding
+ * ourself and submit a padded block
+ */
+void talitos_handle_buggy_hash(struct talitos_ctx *ctx,
+  struct talitos_edesc *edesc,
+  struct talitos_ptr *ptr)
+{
+   static u8 padded_hash[64] = {
+   0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   };
+
+   pr_err_once("Bug in SEC1, padding ourself\n");
+   edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD;
+   map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash),
+  (char *)padded_hash, DMA_TO_DEVICE);
+}
+
 static int common_nonsnoop_hash(struct talitos_edesc *edesc,
struct ahash_request *areq, unsigned int length,
void (*callback) (struct device *dev,
@@ -1857,6 +1878,9 @@ static int common_nonsnoop_hash(struct talitos_edesc 
*edesc,
/* last DWORD empty */
desc->ptr[6] = zero_entry;
 
+   if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0)
+   talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]);
+
ret = talitos_submit(dev, ctx->ch, desc, callback, areq);
if (ret != -EINPROGRESS) {
common_nonsnoop_hash_unmap(dev, edesc, areq);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 16/17] crypto: talitos - Add fsl,sec1.0 compatible

2015-04-17 Thread Christophe Leroy
We add a specific compatible for SEC1, to handle the differences
between SEC1 and SEC2+

Signed-off-by: Christophe Leroy 
---
 drivers/crypto/talitos.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index f1406d7b..c04074d 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -3086,9 +3086,16 @@ err_out:
 }
 
 static const struct of_device_id talitos_match[] = {
+#ifdef CONFIG_CRYPTO_DEV_TALITOS1
+   {
+   .compatible = "fsl,sec1.0",
+   },
+#endif
+#ifdef CONFIG_CRYPTO_DEV_TALITOS2
{
.compatible = "fsl,sec2.0",
},
+#endif
{},
 };
 MODULE_DEVICE_TABLE(of, talitos_match);
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 17/17] crypto: talitos - Update DT bindings with SEC1

2015-04-17 Thread Christophe Leroy
This patch updates the documentation by including SEC1 into SEC2/3 doc

Signed-off-by: Christophe Leroy 
---
 Documentation/devicetree/bindings/crypto/fsl-sec2.txt | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt 
b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
index 38988ef..f0d926b 100644
--- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
+++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt
@@ -1,9 +1,11 @@
-Freescale SoC SEC Security Engines versions 2.x-3.x
+Freescale SoC SEC Security Engines versions 1.x-2.x-3.x
 
 Required properties:
 
 - compatible : Should contain entries for this and backward compatible
-  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0"
+  SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3)
+ e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1)
+warning: SEC1 and SEC2 are mutually exclusive
 - reg : Offset and length of the register set for the device
 - interrupts : the SEC's interrupt number
 - fsl,num-channels : An integer representing the number of channels
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/8xx: mmu_virtual_psize incorrect for 16k pages

2015-04-17 Thread Christophe Leroy
mmu_virtual_psize shall be set to MMU_PAGE_16K when 16k pages have
been selected

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/include/asm/mmu-8xx.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
b/arch/powerpc/include/asm/mmu-8xx.h
index 986b9e1..d41200c 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -145,7 +145,14 @@ typedef struct {
 } mm_context_t;
 #endif /* !__ASSEMBLY__ */
 
+#if (PAGE_SHIFT == 12)
 #define mmu_virtual_psize  MMU_PAGE_4K
+#elif (PAGE_SHIFT == 14)
+#define mmu_virtual_psize  MMU_PAGE_16K
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
 #define mmu_linear_psize   MMU_PAGE_8M
 
 #endif /* _ASM_POWERPC_MMU_8XX_H_ */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 00/11] powerpc8xx: Further optimisation of TLB handling

2015-04-19 Thread Christophe Leroy
This patchset provides a further optimisation of TLB handling in the 8xx.
Main changes are based on:
- Using processor handling of PGD/PTE Validity bits instead of testing ourselves
the entries validity
- Aligning PGD address to allow direct bit manipulation
- Not saving registers like CR when not needed

It also adds support to any TASK_SIZE

Patchset:
01 - powerpc/8xx: remove remaining unnecessary code in FixupDAR
02 - powerpc/8xx: remove tests on PGDIR entry validity
03 - powerpc32: Use kmem_cache memory for PGDIR
04 - powerpc/8xx: Take benefit of aligned PGDIR
05 - powerpc/8xx: Optimise access to swapper_pg_dir
06 - powerpc/8xx: Remove duplicated code in set_context()
07 - powerpc/8xx: macro for handling CPU15 errata
08 - powerpc/8xx: Handle CR out of exception PROLOG/EPILOG
09 - powerpc/8xx: dont save CR in SCRATCH registers
10 - powerpc/8xx: Use SPRG2 instead of DAR for saving r3
11 - powerpc/8xx: Add support for TASK_SIZE greater than 0x8000

All changes have been successfully tested on MPC885

Signed-off-by: Christophe Leroy 
Tested-by: Christophe Leroy 

---
 arch/powerpc/include/asm/page.h  |  8 +++
 arch/powerpc/include/asm/pgtable-ppc32.h | 37 +---
 arch/powerpc/include/asm/pgtable.h   | 17 ---
 arch/powerpc/include/asm/pte-8xx.h   |  1 +
 arch/powerpc/include/asm/pte-common.h|  3 +++
 arch/powerpc/kernel/head_8xx.S   |  3 ---
 arch/powerpc/mm/pgtable_32.c | 14 
 7 files changed, 56 insertions(+), 27 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 01/11] powerpc/8xx: remove remaining unnecessary code in FixupDAR

2015-04-19 Thread Christophe Leroy
Since commit 33fb845a6f01 ("powerpc/8xx: Don't use MD_TWC for walk"), MD_EPN and
MD_TWC are not writen anymore in FixupDAR so saving r3 has become useless.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 6 --
 1 file changed, 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 3d4b8ee..79b8a23 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -532,9 +532,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
 FixupDAR:/* Entry point for dcbx workaround. */
-#ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
-#endif
mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
@@ -551,9 +548,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
/* Extract level 2 index */
rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr11, r10, r11   /* Get the pte */
-#ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
-#endif
/* concat physical page address(r11) and page offset(r10) */
mfspr   r10, SPRN_SRR0
rlwimi  r11, r10, 0, 32 - PAGE_SHIFT, 31
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 02/11] powerpc/8xx: remove tests on PGDIR entry validity

2015-04-19 Thread Christophe Leroy
Kernel MMU handling code handles validity of entries via _PMD_PRESENT which
corresponds to V bit in MD_TWC and MI_TWC. When the V bit is not set, MPC8xx
triggers TLBError exception. So we don't have to check that and branch ourself
to TLBError. We can set TLB entries with non present entries, remove all those
tests and let the 8xx handle it. This reduce the number of cycle when the
entries are valid which is the case most of the time, and doesn't significantly
increase the time for handling invalid entries.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 41 -
 1 file changed, 8 insertions(+), 33 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 79b8a23..2c329f1 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -329,12 +329,9 @@ InstructionTLBMiss:
/* Extract level 1 index */
rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
-   rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
-   beq 2f  /* If zero, don't try to find a pte */
+   rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
 
-   /* We have a pte table, so load the MI_TWC with the attributes
-* for this "segment."
-*/
+   /* Load the MI_TWC with the attributes for this "segment." */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
/* Extract level 2 index */
@@ -342,13 +339,11 @@ InstructionTLBMiss:
lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
-   andi.   r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
-   cmpwi   cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT
-   li  r11, RPN_PATTERN
-   bne-cr0, 2f
-#else
-   li  r11, RPN_PATTERN
+   rlwinm  r11, r10, 32-5, _PAGE_PRESENT
+   and r11, r11, r10
+   rlwimi  r10, r11, 0, _PAGE_PRESENT
 #endif
+   li  r11, RPN_PATTERN
/* The Linux PTE won't go exactly into the MMU TLB.
 * Software indicator bits 21 and 28 must be clear.
 * Software indicator bits 24, 25, 26, and 27 must be
@@ -366,21 +361,6 @@ InstructionTLBMiss:
mfspr   r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
rfi
-2:
-   mfspr   r10, SPRN_SRR1
-   /* clear all error bits as TLB Miss
-* sets a few unconditionally
-   */
-   rlwinm  r10, r10, 0, 0x
-   mtspr   SPRN_SRR1, r10
-
-   /* Restore registers */
-#ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-#endif
-   mfspr   r10, SPRN_SPRG_SCRATCH2
-   b   InstructionTLBError1
 
. = 0x1200
 DataStoreTLBMiss:
@@ -403,8 +383,6 @@ DataStoreTLBMiss:
/* Extract level 1 index */
rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
-   rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
-   beq 2f  /* If zero, don't try to find a pte */
 
/* We have a pte table, so load fetch the pte from the table.
 */
@@ -450,7 +428,7 @@ DataStoreTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-2: li  r11, RPN_PATTERN
+   li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
MTSPR_CPU6(SPRN_MD_RPN, r10, r3)/* Update TLB entry */
 
@@ -469,10 +447,7 @@ DataStoreTLBMiss:
  */
. = 0x1300
 InstructionTLBError:
-   EXCEPTION_PROLOG_0
-InstructionTLBError1:
-   EXCEPTION_PROLOG_1
-   EXCEPTION_PROLOG_2
+   EXCEPTION_PROLOG
mr  r4,r12
mr  r5,r9
andis.  r10,r5,0x4000
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 03/11] powerpc32: Use kmem_cache memory for PGDIR

2015-04-19 Thread Christophe Leroy
When pages are not 4K, PGDIR table is allocated with kmalloc(). In order to
optimise TLB handlers, aligned memory is needed. kmalloc() doesn't provide
aligned memory blocks, so lets use a kmem_cache pool instead.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/include/asm/pgtable-ppc32.h |  4 
 arch/powerpc/mm/pgtable_32.c | 16 ++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h 
b/arch/powerpc/include/asm/pgtable-ppc32.h
index 543bb8e..d323e8b 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -346,10 +346,14 @@ static inline void __ptep_set_access_flags(pte_t *ptep, 
pte_t entry)
 #define pte_to_pgoff(pte)  (pte_val(pte) >> 3)
 #define pgoff_to_pte(off)  ((pte_t) { ((off) << 3) | _PAGE_FILE })
 
+#ifndef CONFIG_PPC_4K_PAGES
+void pgtable_cache_init(void);
+#else
 /*
  * No page table caches to initialise
  */
 #define pgtable_cache_init()   do { } while (0)
+#endif
 
 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
  pmd_t **pmdp);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index cf11342..730dc2d 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -73,13 +73,25 @@ extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa);
 
 #define PGDIR_ORDER(32 + PGD_T_LOG2 - PGDIR_SHIFT)
 
+#ifndef CONFIG_PPC_4K_PAGES
+static struct kmem_cache *pgtable_cache;
+
+void pgtable_cache_init(void)
+{
+   pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER,
+ 1 << PGDIR_ORDER, 0, NULL);
+   if (pgtable_cache == NULL)
+   panic("Couldn't allocate pgtable caches");
+}
+#endif
+
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
pgd_t *ret;
 
/* pgdir take page or two with 4K pages and a page fraction otherwise */
 #ifndef CONFIG_PPC_4K_PAGES
-   ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL);
+   ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO);
 #else
ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
PGDIR_ORDER - PAGE_SHIFT);
@@ -90,7 +102,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
 #ifndef CONFIG_PPC_4K_PAGES
-   kfree((void *)pgd);
+   kmem_cache_free(pgtable_cache, (void *)pgd);
 #else
free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT);
 #endif
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 04/11] powerpc/8xx: Take benefit of aligned PGDIR

2015-04-19 Thread Christophe Leroy
L1 base address is now aligned so we can insert L1 index into r11 directly and
then preserve r10

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 34 +++---
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 2c329f1..ae05f28 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -326,16 +326,15 @@ InstructionTLBMiss:
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
 #endif
-   /* Extract level 1 index */
-   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwzxr11, r10, r11   /* Get the level 1 entry */
-   rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
+   /* Insert level 1 index */
+   rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
+   lwz r11, 0(r11) /* Get the level 1 entry */
 
/* Load the MI_TWC with the attributes for this "segment." */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
-   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   rlwinm  r11, r11,0,0,19 /* Extract page descriptor page address */
/* Extract level 2 index */
-   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+   rlwinm  r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
@@ -380,13 +379,12 @@ DataStoreTLBMiss:
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
-   /* Extract level 1 index */
-   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwzxr11, r10, r11   /* Get the level 1 entry */
+   /* Insert level 1 index */
+   rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
+   lwz r11, 0(r11) /* Get the level 1 entry */
 
/* We have a pte table, so load fetch the pte from the table.
 */
-   mfspr   r10, SPRN_MD_EPN/* Get address of fault */
/* Extract level 2 index */
rlwinm  r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
@@ -515,16 +513,14 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
-   /* Extract level 1 index */
-3: rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwzxr11, r10, r11   /* Get the level 1 entry */
-   rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
-   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-   /* Extract level 2 index */
-   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
-   lwzxr11, r10, r11   /* Get the pte */
+   /* Insert level 1 index */
+3: rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
+   lwz r11, 0(r11) /* Get the level 1 entry */
+   rlwinm  r11, r11,0,0,19 /* Extract page descriptor page address */
+   /* Insert level 2 index */
+   rlwimi  r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+   lwz r11, 0(r11) /* Get the pte */
/* concat physical page address(r11) and page offset(r10) */
-   mfspr   r10, SPRN_SRR0
rlwimi  r11, r10, 0, 32 - PAGE_SHIFT, 31
lwz r11,0(r11)
 /* Check if it really is a dcbx instruction. */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 05/11] powerpc/8xx: Optimise access to swapper_pg_dir

2015-04-19 Thread Christophe Leroy
All accessed to PGD entries are done via 0(r11).
By using lower part of swapper_pg_dir as load index to r11, we can remove the
ori instruction.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ae05f28..aa45225 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -322,13 +322,12 @@ InstructionTLBMiss:
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
 #ifdef CONFIG_MODULES
beq 3f
-   lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
-   ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+   lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
 #endif
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwz r11, 0(r11) /* Get the level 1 entry */
+   lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
 
/* Load the MI_TWC with the attributes for this "segment." */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
@@ -376,12 +375,11 @@ DataStoreTLBMiss:
andis.  r11, r10, 0x8000
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq 3f
-   lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
-   ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+   lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwz r11, 0(r11) /* Get the level 1 entry */
+   lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
 
/* We have a pte table, so load fetch the pte from the table.
 */
@@ -510,12 +508,11 @@ FixupDAR:/* Entry point for dcbx workaround. */
mfspr   r10, SPRN_SRR0
andis.  r11, r10, 0x8000/* Address >= 0x8000 */
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
-   beq-3f  /* Branch if user space */
-   lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
-   ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
+   beq 3f
+   lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
/* Insert level 1 index */
 3: rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-   lwz r11, 0(r11) /* Get the level 1 entry */
+   lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
rlwinm  r11, r11,0,0,19 /* Extract page descriptor page address */
/* Insert level 2 index */
rlwimi  r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
@@ -670,8 +667,7 @@ start_here:
 * init's THREAD like the context switch code does, but this is
 * easier..until someone changes init's static structures.
 */
-   lis r6, swapper_pg_dir@h
-   ori r6, r6, swapper_pg_dir@l
+   lis r6, swapper_pg_dir@ha
tophys(r6,r6)
 #ifdef CONFIG_8xx_CPU6
lis r4, cpu6_errata_word@h
@@ -850,6 +846,8 @@ _GLOBAL(set_context)
stw r4, 0x4(r5)
 #endif
 
+   li  r5, (swapper_pg_dir-PAGE_OFFSET)@l
+   sub r4, r4, r5
 #ifdef CONFIG_8xx_CPU6
lis r6, cpu6_errata_word@h
ori r6, r6, cpu6_errata_word@l
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 06/11] powerpc/8xx: Remove duplicated code in set_context()

2015-04-19 Thread Christophe Leroy
Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index aa45225..b227902e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -848,23 +848,21 @@ _GLOBAL(set_context)
 
li  r5, (swapper_pg_dir-PAGE_OFFSET)@l
sub r4, r4, r5
+   tophys  (r4, r4)
 #ifdef CONFIG_8xx_CPU6
lis r6, cpu6_errata_word@h
ori r6, r6, cpu6_errata_word@l
-   tophys  (r4, r4)
li  r7, 0x3f80
stw r7, 12(r6)
lwz r7, 12(r6)
+#endif
 mtspr   SPRN_M_TW, r4   /* Update MMU base address */
+#ifdef CONFIG_8xx_CPU6
li  r7, 0x3380
stw r7, 12(r6)
lwz r7, 12(r6)
-mtspr   SPRN_M_CASID, r3 /* Update context */
-#else
-mtspr   SPRN_M_CASID,r3/* Update context */
-   tophys  (r4, r4)
-   mtspr   SPRN_M_TW, r4   /* and pgd */
 #endif
+mtspr   SPRN_M_CASID, r3 /* Update context */
SYNC
blr
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 07/11] powerpc/8xx: macro for handling CPU15 errata

2015-04-19 Thread Christophe Leroy
Having a macro will help keep clear code.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b227902e..b3f3cb5 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -297,6 +297,17 @@ SystemCall:
  * We have to use the MD_xxx registers for the tablewalk because the
  * equivalent MI_xxx registers only perform the attribute functions.
  */
+
+#ifdef CONFIG_8xx_CPU15
+#define DO_8xx_CPU15(tmp, addr)\
+   additmp, addr, PAGE_SIZE;   \
+   tlbie   tmp;\
+   additmp, addr, PAGE_SIZE;   \
+   tlbie   tmp
+#else
+#define DO_8xx_CPU15(tmp, addr)
+#endif
+
 InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
mtspr   SPRN_DAR, r3
@@ -304,12 +315,7 @@ InstructionTLBMiss:
EXCEPTION_PROLOG_0
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-#ifdef CONFIG_8xx_CPU15
-   addir11, r10, PAGE_SIZE
-   tlbie   r11
-   addir11, r10, -PAGE_SIZE
-   tlbie   r11
-#endif
+   DO_8xx_CPU15(r11, r10)
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 08/11] powerpc/8xx: Handle CR out of exception PROLOG/EPILOG

2015-04-19 Thread Christophe Leroy
In order to be able to reduce scope during which CR is saved, we take
CR saving/restoring out of exception PROLOG and EPILOG

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index b3f3cb5..c89aed9 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -116,13 +116,13 @@ turn_on_mmu:
  */
 #define EXCEPTION_PROLOG   \
EXCEPTION_PROLOG_0; \
+   mfcrr10;\
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
 
 #define EXCEPTION_PROLOG_0 \
mtspr   SPRN_SPRG_SCRATCH0,r10; \
-   mtspr   SPRN_SPRG_SCRATCH1,r11; \
-   mfcrr10
+   mtspr   SPRN_SPRG_SCRATCH1,r11
 
 #define EXCEPTION_PROLOG_1 \
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel */ \
@@ -162,7 +162,6 @@ turn_on_mmu:
  * Exception exit code.
  */
 #define EXCEPTION_EPILOG_0 \
-   mtcrr10;\
mfspr   r10,SPRN_SPRG_SCRATCH0; \
mfspr   r11,SPRN_SPRG_SCRATCH1
 
@@ -313,6 +312,7 @@ InstructionTLBMiss:
mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
+   mfcrr10
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
DO_8xx_CPU15(r11, r10)
@@ -363,6 +363,7 @@ InstructionTLBMiss:
mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
+   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
@@ -372,6 +373,7 @@ DataStoreTLBMiss:
mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
+   mfcrr10
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_MD_EPN
 
@@ -440,6 +442,7 @@ DataStoreTLBMiss:
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
mfspr   r10, SPRN_SPRG_SCRATCH2
+   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
@@ -465,6 +468,7 @@ InstructionTLBError:
. = 0x1400
 DataTLBError:
EXCEPTION_PROLOG_0
+   mfcrr10
 
mfspr   r11, SPRN_DAR
cmpwi   cr0, r11, RPN_PATTERN
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 09/11] powerpc/8xx: dont save CR in SCRATCH registers

2015-04-19 Thread Christophe Leroy
CR only needs to be preserved when checking if we are handling a kernel address.
So we can preserve CR in a register:
- In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we
don't need to do anything at all with CR.
- If CONFIG_8xx_CPU6 is defined, we have r3 available for saving CR
- Otherwise, we use r10, then we reload SRR0/MD_EPN into r10 when CR is restored

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 53 +-
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index c89aed9..a073918 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -308,14 +308,10 @@ SystemCall:
 #endif
 
 InstructionTLBMiss:
+   EXCEPTION_PROLOG_0
 #ifdef CONFIG_8xx_CPU6
mtspr   SPRN_DAR, r3
 #endif
-   EXCEPTION_PROLOG_0
-   mfcrr10
-   mtspr   SPRN_SPRG_SCRATCH2, r10
-   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-   DO_8xx_CPU15(r11, r10)
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
@@ -323,14 +319,33 @@ InstructionTLBMiss:
 #ifdef CONFIG_MODULES
/* Only modules will cause ITLB Misses as we always
 * pin the first 8MB of kernel memory */
+#ifdef CONFIG_8xx_CPU6
+   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+   DO_8xx_CPU15(r11, r10)
+   mfcrr3
andis.  r11, r10, 0x8000/* Address >= 0x8000 */
+#else
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   DO_8xx_CPU15(r10, r11)
+   mfcrr10
+   andis.  r11, r11, 0x8000/* Address >= 0x8000 */
 #endif
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
-#ifdef CONFIG_MODULES
beq 3f
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+#ifdef CONFIG_8xx_CPU6
+   mtcrr3
+#else
+   mtcrr10
+   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #endif
+#else /* CONFIG_MODULES */
+   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+   DO_8xx_CPU15(r11, r10)
+   mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
+#endif /* CONFIG_MODULES */
+
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
@@ -362,29 +377,37 @@ InstructionTLBMiss:
mfspr   r3, SPRN_DAR
mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
-   mfspr   r10, SPRN_SPRG_SCRATCH2
-   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
. = 0x1200
 DataStoreTLBMiss:
-#ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
-#endif
EXCEPTION_PROLOG_0
-   mfcrr10
-   mtspr   SPRN_SPRG_SCRATCH2, r10
-   mfspr   r10, SPRN_MD_EPN
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
 */
+#ifdef CONFIG_8xx_CPU6
+   mtspr   SPRN_DAR, r3
+   mfcrr3
+   mfspr   r10, SPRN_MD_EPN
andis.  r11, r10, 0x8000
+#else
+   mfcrr10
+   mfspr   r11, SPRN_MD_EPN
+   andis.  r11, r11, 0x8000
+#endif
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq 3f
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+#ifdef CONFIG_8xx_CPU6
+   mtcrr3
+#else
+   mtcrr10
+   mfspr   r10, SPRN_MD_EPN
+#endif
+
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
@@ -441,8 +464,6 @@ DataStoreTLBMiss:
mfspr   r3, SPRN_DAR
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r10, SPRN_SPRG_SCRATCH2
-   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 10/11] powerpc/8xx: Use SPRG2 instead of DAR for saving r3

2015-04-19 Thread Christophe Leroy
We now have SPRG2 available as in it not used anymore for saving CR, so we don't
need to crash DAR anymore for saving r3 for CPU6 ERRATA handling.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a073918..dbe110e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -310,7 +310,7 @@ SystemCall:
 InstructionTLBMiss:
EXCEPTION_PROLOG_0
 #ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
+   mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
 
/* If we are faulting a kernel address, we have to use the
@@ -374,8 +374,7 @@ InstructionTLBMiss:
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
+   mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
EXCEPTION_EPILOG_0
rfi
@@ -388,7 +387,7 @@ DataStoreTLBMiss:
 * kernel page tables.
 */
 #ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
+   mtspr   SPRN_SPRG_SCRATCH2, r3
mfcrr3
mfspr   r10, SPRN_MD_EPN
andis.  r11, r10, 0x8000
@@ -461,7 +460,7 @@ DataStoreTLBMiss:
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
+   mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
EXCEPTION_EPILOG_0
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 11/11] powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000

2015-04-19 Thread Christophe Leroy
By default, TASK_SIZE is set to 0x8000 for PPC_8xx, which is most likely
sufficient for most cases. However, kernel configuration allows to set TASK_SIZE
to another value, so the 8xx shall handle it.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index dbe110e..d380658 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -48,6 +48,19 @@
mtspr   spr, reg
 #endif
 
+/* Macro to test if an address is a kernel address */
+#if CONFIG_TASK_SIZE <= 0x8000
+#define IS_KERNEL(tmp, addr)   \
+   andis.  tmp, addr, 0x8000   /* Address >= 0x8000 */
+#define BRANCH_UNLESS_KERNEL(label)beq label
+#else
+#define IS_KERNEL(tmp, addr)   \
+   rlwinm  tmp, addr, 16, 16, 31;  \
+   cmpli   cr0, tmp, PAGE_OFFSET >> 16
+#define BRANCH_UNLESS_KERNEL(label)blt label
+#endif
+
+
 /*
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
@@ -323,15 +336,15 @@ InstructionTLBMiss:
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
DO_8xx_CPU15(r11, r10)
mfcrr3
-   andis.  r11, r10, 0x8000/* Address >= 0x8000 */
+   IS_KERNEL(r11, r10)
 #else
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
DO_8xx_CPU15(r10, r11)
mfcrr10
-   andis.  r11, r11, 0x8000/* Address >= 0x8000 */
+   IS_KERNEL(r11, r11)
 #endif
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
 #ifdef CONFIG_8xx_CPU6
@@ -390,14 +403,14 @@ DataStoreTLBMiss:
mtspr   SPRN_SPRG_SCRATCH2, r3
mfcrr3
mfspr   r10, SPRN_MD_EPN
-   andis.  r11, r10, 0x8000
+   IS_KERNEL(r11, r10)
 #else
mfcrr10
mfspr   r11, SPRN_MD_EPN
-   andis.  r11, r11, 0x8000
+   IS_KERNEL(r11, r11)
 #endif
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
 #ifdef CONFIG_8xx_CPU6
@@ -536,9 +549,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
-   andis.  r11, r10, 0x8000/* Address >= 0x8000 */
+   IS_KERNEL(r11, r10)
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
/* Insert level 1 index */
 3: rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 0/5] powerpc8xx: Further optimisation of TLB handling

2015-04-19 Thread Christophe Leroy
This patchset provides a further optimisation of TLB handling in the 8xx.
Changes are:
- Not saving registers like CR when not needed
- Adding support to any TASK_SIZE

Only the last patch of the set is changed compared to v4

Resending with proper From: this time.

Christophe Leroy (5):
  powerpc/8xx: macro for handling CPU15 errata
  powerpc/8xx: Handle CR out of exception PROLOG/EPILOG
  powerpc/8xx: dont save CR in SCRATCH registers
  powerpc/8xx: Use SPRG2 instead of DAR for saving r3
  powerpc/8xx: Add support for TASK_SIZE greater than 0x8000

 arch/powerpc/kernel/head_8xx.S | 79 +++---
 1 file changed, 51 insertions(+), 28 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 1/5] powerpc/8xx: macro for handling CPU15 errata

2015-04-19 Thread Christophe Leroy
Having a macro will help keep clear code.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9b53fe1..1279018 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -297,6 +297,17 @@ SystemCall:
  * We have to use the MD_xxx registers for the tablewalk because the
  * equivalent MI_xxx registers only perform the attribute functions.
  */
+
+#ifdef CONFIG_8xx_CPU15
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \
+   additmp, addr, PAGE_SIZE;   \
+   tlbie   tmp;\
+   additmp, addr, -PAGE_SIZE;  \
+   tlbie   tmp
+#else
+#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr)
+#endif
+
 InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
mtspr   SPRN_DAR, r3
@@ -304,12 +315,7 @@ InstructionTLBMiss:
EXCEPTION_PROLOG_0
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-#ifdef CONFIG_8xx_CPU15
-   addir11, r10, PAGE_SIZE
-   tlbie   r11
-   addir11, r10, -PAGE_SIZE
-   tlbie   r11
-#endif
+   INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 2/5] powerpc/8xx: Handle CR out of exception PROLOG/EPILOG

2015-04-19 Thread Christophe Leroy
In order to be able to reduce scope during which CR is saved, we take
CR saving/restoring out of exception PROLOG and EPILOG

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1279018..5a69c5e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -116,13 +116,13 @@ turn_on_mmu:
  */
 #define EXCEPTION_PROLOG   \
EXCEPTION_PROLOG_0; \
+   mfcrr10;\
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
 
 #define EXCEPTION_PROLOG_0 \
mtspr   SPRN_SPRG_SCRATCH0,r10; \
-   mtspr   SPRN_SPRG_SCRATCH1,r11; \
-   mfcrr10
+   mtspr   SPRN_SPRG_SCRATCH1,r11
 
 #define EXCEPTION_PROLOG_1 \
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel */ \
@@ -162,7 +162,6 @@ turn_on_mmu:
  * Exception exit code.
  */
 #define EXCEPTION_EPILOG_0 \
-   mtcrr10;\
mfspr   r10,SPRN_SPRG_SCRATCH0; \
mfspr   r11,SPRN_SPRG_SCRATCH1
 
@@ -313,6 +312,7 @@ InstructionTLBMiss:
mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
+   mfcrr10
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
@@ -363,6 +363,7 @@ InstructionTLBMiss:
mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
+   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
@@ -372,6 +373,7 @@ DataStoreTLBMiss:
mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
+   mfcrr10
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_MD_EPN
 
@@ -437,6 +439,7 @@ DataStoreTLBMiss:
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
mfspr   r10, SPRN_SPRG_SCRATCH2
+   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
@@ -462,6 +465,7 @@ InstructionTLBError:
. = 0x1400
 DataTLBError:
EXCEPTION_PROLOG_0
+   mfcrr10
 
mfspr   r11, SPRN_DAR
cmpwi   cr0, r11, RPN_PATTERN
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 3/5] powerpc/8xx: dont save CR in SCRATCH registers

2015-04-19 Thread Christophe Leroy
CR only needs to be preserved when checking if we are handling a kernel address.
So we can preserve CR in a register:
- In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we
don't need to do anything at all with CR.
- We use r10, then we reload SRR0/MD_EPN into r10 when CR is restored

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 29 +++--
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5a69c5e..150d03f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -312,10 +312,6 @@ InstructionTLBMiss:
mtspr   SPRN_DAR, r3
 #endif
EXCEPTION_PROLOG_0
-   mfcrr10
-   mtspr   SPRN_SPRG_SCRATCH2, r10
-   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
-   INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
@@ -323,13 +319,20 @@ InstructionTLBMiss:
 #ifdef CONFIG_MODULES
/* Only modules will cause ITLB Misses as we always
 * pin the first 8MB of kernel memory */
-   andis.  r11, r10, 0x8000/* Address >= 0x8000 */
-#endif
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
+   mfcrr10
+   andis.  r11, r11, 0x8000/* Address >= 0x8000 */
mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-#ifdef CONFIG_MODULES
beq 3f
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+   mtcrr10
+   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+#else
+   mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
+   INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
+   mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
 #endif
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
@@ -362,8 +365,6 @@ InstructionTLBMiss:
mfspr   r3, SPRN_DAR
mtspr   SPRN_DAR, r11   /* Tag DAR */
 #endif
-   mfspr   r10, SPRN_SPRG_SCRATCH2
-   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
@@ -374,17 +375,19 @@ DataStoreTLBMiss:
 #endif
EXCEPTION_PROLOG_0
mfcrr10
-   mtspr   SPRN_SPRG_SCRATCH2, r10
-   mfspr   r10, SPRN_MD_EPN
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
 */
-   andis.  r11, r10, 0x8000
+   mfspr   r11, SPRN_MD_EPN
+   andis.  r11, r11, 0x8000
mfspr   r11, SPRN_M_TW  /* Get level 1 table */
beq 3f
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
+   mtcrr10
+   mfspr   r10, SPRN_MD_EPN
+
/* Insert level 1 index */
rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the 
level 1 entry */
@@ -438,8 +441,6 @@ DataStoreTLBMiss:
mfspr   r3, SPRN_DAR
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r10, SPRN_SPRG_SCRATCH2
-   mtcrr10
EXCEPTION_EPILOG_0
rfi
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 4/5] powerpc/8xx: Use SPRG2 instead of DAR for saving r3

2015-04-19 Thread Christophe Leroy
We now have SPRG2 available as in it not used anymore for saving CR, so we don't
need to crash DAR anymore for saving r3 for CPU6 ERRATA handling.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 150d03f..ba2dc53 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -309,7 +309,7 @@ SystemCall:
 
 InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
+   mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
EXCEPTION_PROLOG_0
 
@@ -362,8 +362,7 @@ InstructionTLBMiss:
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
+   mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
EXCEPTION_EPILOG_0
rfi
@@ -371,7 +370,7 @@ InstructionTLBMiss:
. = 0x1200
 DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
-   mtspr   SPRN_DAR, r3
+   mtspr   SPRN_SPRG_SCRATCH2, r3
 #endif
EXCEPTION_PROLOG_0
mfcrr10
@@ -438,7 +437,7 @@ DataStoreTLBMiss:
 
/* Restore registers */
 #ifdef CONFIG_8xx_CPU6
-   mfspr   r3, SPRN_DAR
+   mfspr   r3, SPRN_SPRG_SCRATCH2
 #endif
mtspr   SPRN_DAR, r11   /* Tag DAR */
EXCEPTION_EPILOG_0
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH RESEND v5 5/5] powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000

2015-04-19 Thread Christophe Leroy
By default, TASK_SIZE is set to 0x8000 for PPC_8xx, which is most
likely sufficient for most cases. However, kernel configuration allows
to set TASK_SIZE to another value, so the 8xx shall handle it.

This patch also takes into account the case of PAGE_OFFSET lower than
0x8000, allthought most of the time it is equal to 0xC000

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/head_8xx.S | 25 +++--
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ba2dc53..c640bbb 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -48,6 +48,19 @@
mtspr   spr, reg
 #endif
 
+/* Macro to test if an address is a kernel address */
+#if CONFIG_TASK_SIZE <= 0x8000 && CONFIG_PAGE_OFFSET >= 0x8000
+#define IS_KERNEL(tmp, addr)   \
+   andis.  tmp, addr, 0x8000   /* Address >= 0x8000 */
+#define BRANCH_UNLESS_KERNEL(label)beq label
+#else
+#define IS_KERNEL(tmp, addr)   \
+   rlwinm  tmp, addr, 16, 16, 31;  \
+   cmpli   cr0, tmp, PAGE_OFFSET >> 16
+#define BRANCH_UNLESS_KERNEL(label)blt label
+#endif
+
+
 /*
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
@@ -322,9 +335,9 @@ InstructionTLBMiss:
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11)
mfcrr10
-   andis.  r11, r11, 0x8000/* Address >= 0x8000 */
+   IS_KERNEL(r11, r11)
mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
mtcrr10
@@ -379,9 +392,9 @@ DataStoreTLBMiss:
 * kernel page tables.
 */
mfspr   r11, SPRN_MD_EPN
-   andis.  r11, r11, 0x8000
+   IS_KERNEL(r11, r11)
mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
 3:
mtcrr10
@@ -513,9 +526,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
-   andis.  r11, r10, 0x8000/* Address >= 0x8000 */
+   IS_KERNEL(r11, r10)
mfspr   r11, SPRN_M_TW  /* Get level 1 table */
-   beq 3f
+   BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
/* Insert level 1 index */
 3: rlwimi  r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/3] powerpc/8xx: mark _PAGE_SHARED all types of kernel pages

2015-04-22 Thread Christophe Leroy
All kernel pages have to be marked as shared in order to not perform
CASID verification.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/pte-8xx.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/pte-8xx.h 
b/arch/powerpc/include/asm/pte-8xx.h
index eb6edb4..c8aacad 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -63,7 +63,12 @@
 
 /* We need to add _PAGE_SHARED to kernel pages */
 #define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO)
-#define _PAGE_KERNEL_ROX   (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO)
+#define _PAGE_KERNEL_ROX   (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \
+_PAGE_EXEC)
+#define _PAGE_KERNEL_RW(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW 
| \
+_PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX   (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
+_PAGE_HWWRITE | _PAGE_EXEC)
 
 #endif /* __KERNEL__ */
 #endif /*  _ASM_POWERPC_PTE_8xx_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc/8xx: Handle PAGE_USER via APG bits

2015-04-22 Thread Christophe Leroy
Use of APG for handling PAGE_USER.

All pages PP exec bits are set to either 000 or 011, which means
respectively RW for Supervisor and no access for User, or RO for
Supervisor and no access for user.

Then we use the APG to say whether accesses are according to
Page rules or "all Supervisor" rules (Access to all)

Therefore, we define 2 APG groups corresponding to _PAGE_USER.
Mx_AP are initialised as follows:
GP0 => No user => 01 (all accesses performed according
to page definition)
GP1 => User => 00 (all accesses performed as supervisor
according to page definition)

This removes the special 8xx handling in pte_update()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/pgtable-ppc32.h | 19 ---
 arch/powerpc/include/asm/pte-8xx.h   | 27 +--
 arch/powerpc/kernel/head_8xx.S   | 21 -
 3 files changed, 21 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h 
b/arch/powerpc/include/asm/pgtable-ppc32.h
index b3e9a3e..d280fa2 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -170,24 +170,6 @@ static inline unsigned long pte_update(pte_t *p,
 #ifdef PTE_ATOMIC_UPDATES
unsigned long old, tmp;
 
-#ifdef CONFIG_PPC_8xx
-   unsigned long tmp2;
-
-   __asm__ __volatile__("\
-1: lwarx   %0,0,%4\n\
-   andc%1,%0,%5\n\
-   or  %1,%1,%6\n\
-   /* 0x200 == Extended encoding, bit 22 */ \
-   /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \
-   rlwimi  %1,%1,32-1,0x200\n /* get _PAGE_RO */ \
-   rlwinm  %3,%1,32-2,0x200\n /* get _PAGE_USER */ \
-   andc%1,%1,%3\n\
-   stwcx.  %1,0,%4\n\
-   bne-1b"
-   : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2)
-   : "r" (p), "r" (clr), "r" (set), "m" (*p)
-   : "cc" );
-#else /* CONFIG_PPC_8xx */
__asm__ __volatile__("\
 1: lwarx   %0,0,%3\n\
andc%1,%0,%4\n\
@@ -198,7 +180,6 @@ static inline unsigned long pte_update(pte_t *p,
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p)
: "cc" );
-#endif /* CONFIG_PPC_8xx */
 #else /* PTE_ATOMIC_UPDATES */
unsigned long old = pte_val(*p);
*p = __pte((old & ~clr) | set);
diff --git a/arch/powerpc/include/asm/pte-8xx.h 
b/arch/powerpc/include/asm/pte-8xx.h
index c8aacad..7926d4e 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -35,36 +35,27 @@
 #define _PAGE_SPECIAL  0x0008  /* SW entry, forced to 0 by the TLB miss */
 #define _PAGE_DIRTY0x0100  /* C: page changed */
 
-/* These 4 software bits must be masked out when the entry is loaded
- * into the TLB, 1 SW bit left(0x0080).
+/* These 4 software bits must be masked out when the L2 entry is loaded
+ * into the TLB.
  */
-#define _PAGE_GUARDED  0x0010  /* software: guarded access */
-#define _PAGE_ACCESSED 0x0020  /* software: page referenced */
-#define _PAGE_WRITETHRU0x0040  /* software: caching is write through */
+#define _PAGE_GUARDED  0x0010  /* Copied to L1 G entry in DTLB */
+#define _PAGE_USER 0x0020  /* Copied to L1 APG lsb */
+#define _PAGE_ACCESSED 0x0040  /* software: page referenced */
+#define _PAGE_WRITETHRU0x0080  /* software: caching is write through */
 
-/* Setting any bits in the nibble with the follow two controls will
- * require a TLB exception handler change.  It is assumed unused bits
- * are always zero.
- */
-#define _PAGE_RO   0x0400  /* lsb PP bits */
-#define _PAGE_USER 0x0800  /* msb PP bits */
-/* set when _PAGE_USER is unset and _PAGE_RO is set */
-#define _PAGE_KNLRO0x0200
+#define _PAGE_RO   0x0600  /* Supervisor RO, User no access */
 
 #define _PMD_PRESENT   0x0001
 #define _PMD_BAD   0x0ff0
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
 
-#define _PTE_NONE_MASK _PAGE_KNLRO
-
 /* Until my rework is finished, 8xx still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES 1
 
 /* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO)
-#define _PAGE_KERNEL_ROX   (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \
-_PAGE_EXEC)
+#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO)
+#define _PAGE_KERNEL_ROX   (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
 #define _PAGE_KERNEL_RW(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW 
| \
 _PAGE_HWWRITE)
 #define _PAGE_KERNEL_RWX   (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
diff --git a/arch/powerpc/kernel/head_8

[PATCH 3/3] powerpc/8xx: Implementation of PAGE_EXEC

2015-04-22 Thread Christophe Leroy
This patch implements PAGE_EXEC capability on the 8xx.

All pages PP exec bits are set to 000, which means Execute for
Supervisor and no Execute for User.
Then we use the APG to say whether accesses are according to Page
rules, "all Supervisor" rules (Exec for all) and
"all User" rules (Exec for noone)

Therefore, we define 4 APG groups. msb is _PAGE_EXEC,
lsb is _PAGE_USER. MI_AP is initialised as follows:
GP0 (00) => Not User, no exec => 11 (all accesses performed as user)
GP1 (01) => User but no exec => 11 (all accesses performed as user)
GP2 (10) => Not User, exec => 01 (rights according to page definition)
GP3 (11) => User, exec => 00 (all accesses performed as supervisor)

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/cputable.h |  2 +-
 arch/powerpc/include/asm/mmu-8xx.h  | 26 ++
 arch/powerpc/include/asm/pte-8xx.h  |  3 ++-
 arch/powerpc/kernel/head_8xx.S  | 12 +---
 4 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/cputable.h 
b/arch/powerpc/include/asm/cputable.h
index daa5af9..c9aa2db 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -360,7 +360,7 @@ extern const char *powerpc_base_platform;
CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \
CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE)
 #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB)
-#define CPU_FTRS_8XX   (CPU_FTR_USE_TB)
+#define CPU_FTRS_8XX   (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_40X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | 
CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_44X   (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | 
CPU_FTR_NOEXECUTE)
 #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | 
CPU_FTR_NOEXECUTE | \
diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
b/arch/powerpc/include/asm/mmu-8xx.h
index d41200c..1407034 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -27,6 +27,19 @@
 #define MI_Ks  0x8000  /* Should not be set */
 #define MI_Kp  0x4000  /* Should always be set */
 
+/*
+ * All pages PP exec bits are set to 000, which means Execute for Supervisor
+ * and no Execute for User.
+ * Then we use the APG to say whether accesses are according to Page rules,
+ * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone)
+ * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER
+ * 0 (00) => Not User, no exec => 11 (all accesses performed as user)
+ * 1 (01) => User but no exec => 11 (all accesses performed as user)
+ * 2 (10) => Not User, exec => 01 (rights according to page definition)
+ * 3 (11) => User, exec => 00 (all accesses performed as supervisor)
+ */
+#define MI_APG_INIT0xf4ff
+
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MI_RPN is written, bits in
  * this register are used to create the TLB entry.
@@ -87,6 +100,19 @@
 #define MD_Ks  0x8000  /* Should not be set */
 #define MD_Kp  0x4000  /* Should always be set */
 
+/*
+ * All pages PP exec bits are set to either 000 or 011, which means 
respectively
+ * RW for Supervisor and no access for User, or RO for Supervisor and no access
+ * for user.
+ * Then we use the APG to say whether accesses are according to Page rules or
+ * "all Supervisor" rules (Access to all)
+ * Therefore, we define 2 APG groups. lsb is _PAGE_USER
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 00 (all accesses performed as supervisor
+ * according to page definition)
+ */
+#define MD_APG_INIT0x4fff
+
 /* The effective page number register.  When read, contains the information
  * about the last instruction TLB miss.  When MD_RPN is written, bits in
  * this register are used to create the TLB entry.
diff --git a/arch/powerpc/include/asm/pte-8xx.h 
b/arch/powerpc/include/asm/pte-8xx.h
index 7926d4e..eae9b05 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -40,8 +40,9 @@
  */
 #define _PAGE_GUARDED  0x0010  /* Copied to L1 G entry in DTLB */
 #define _PAGE_USER 0x0020  /* Copied to L1 APG lsb */
-#define _PAGE_ACCESSED 0x0040  /* software: page referenced */
+#define _PAGE_EXEC 0x0040  /* Copied to L1 APG */
 #define _PAGE_WRITETHRU0x0080  /* software: caching is write through */
+#define _PAGE_ACCESSED 0x0800  /* software: page referenced */
 
 #define _PAGE_RO   0x0600  /* Supervisor RO, User no access */
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 508c645..29a5c1a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -369,7 +369,7 @@ InstructionTLBMiss:

[PATCH 0/3] powerpc/8xx: Implementation of execute protection

2015-04-22 Thread Christophe Leroy
This patchset implements execute protection on the 8xx.
It also simplifies the handling of PAGE_USER and PAGE_RO,
and adds a small fix to the kernel pages definition.

This patchset goes on to of my previous patchset named
"[v5] powerpc8xx: Further optimisation of TLB handling"

Christophe Leroy (3):
  powerpc/8xx: mark _PAGE_SHARED all types of kernel pages
  powerpc/8xx: Handle PAGE_USER via APG bits
  powerpc/8xx: Implementation of PAGE_EXEC

 arch/powerpc/include/asm/cputable.h  |  2 +-
 arch/powerpc/include/asm/mmu-8xx.h   | 26 ++
 arch/powerpc/include/asm/pgtable-ppc32.h | 19 ---
 arch/powerpc/include/asm/pte-8xx.h   | 31 ++-
 arch/powerpc/kernel/head_8xx.S   | 31 ---
 5 files changed, 61 insertions(+), 48 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] spi: fsl-spi: use of_iomap() to map parameter ram on CPM1

2015-04-22 Thread Christophe Leroy
On CPM2, the SPI parameter RAM is dynamically allocated in the
dualport RAM whereas in CPM1, it is statically allocated to a default
address with capability to relocate it somewhere else via the use of
CPM micropatch. The address of the parameter RAM is given by the boot
loader and expected to be mapped via of_iomap()

In the current implementation, in function fsl_spi_cpm_get_pram()
there is a confusion between the SPI_BASE register and the base of the
SPI parameter RAM. Fortunatly, it is working properly with MPC866 and
MPC885 because they do set SPI_BASE, but on MPC860 and other old
MPC8xx that doesn't set SPI_BASE, pram_ofs is not properly set.
Also, the parameter RAM is not properly mapped with of_iomap() as it
should but still gets accessible by chance through the full RAM which
is mapped from somewhere else.

This patch applies to the SPI driver the same principle as for the
CPM UART: when the CPM is of type CPM1, we simply do an of_iomap() of
the area provided via the device tree.

Signed-off-by: Christophe Leroy 

---
 v2: Use devm_ioremap_resource() instead of_iomap()

 drivers/spi/spi-fsl-cpm.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index e85ab1c..4e5c945 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "spi-fsl-cpm.h"
 #include "spi-fsl-lib.h"
@@ -264,17 +265,6 @@ static unsigned long fsl_spi_cpm_get_pram(struct 
mpc8xxx_spi *mspi)
if (mspi->flags & SPI_CPM2) {
pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
out_be16(spi_base, pram_ofs);
-   } else {
-   struct spi_pram __iomem *pram = spi_base;
-   u16 rpbase = in_be16(&pram->rpbase);
-
-   /* Microcode relocation patch applied? */
-   if (rpbase) {
-   pram_ofs = rpbase;
-   } else {
-   pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-   out_be16(spi_base, pram_ofs);
-   }
}
 
iounmap(spi_base);
@@ -287,7 +277,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
struct device_node *np = dev->of_node;
const u32 *iprop;
int size;
-   unsigned long pram_ofs;
unsigned long bds_ofs;
 
if (!(mspi->flags & SPI_CPM_MODE))
@@ -314,8 +303,21 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
}
}
 
-   pram_ofs = fsl_spi_cpm_get_pram(mspi);
-   if (IS_ERR_VALUE(pram_ofs)) {
+   if (mspi->flags & SPI_CPM1) {
+   struct resource *res;
+
+   res = platform_get_resource(to_platform_device(dev),
+   IORESOURCE_MEM, 1);
+   mspi->pram = devm_ioremap_resource(dev, res);
+   } else {
+   unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi);
+
+   if (IS_ERR_VALUE(pram_ofs))
+   mspi->pram = NULL;
+   else
+   mspi->pram = cpm_muram_addr(pram_ofs);
+   }
+   if (mspi->pram == NULL) {
dev_err(dev, "can't allocate spi parameter ram\n");
goto err_pram;
}
@@ -341,8 +343,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
goto err_dummy_rx;
}
 
-   mspi->pram = cpm_muram_addr(pram_ofs);
-
mspi->tx_bd = cpm_muram_addr(bds_ofs);
mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
 
@@ -370,7 +370,8 @@ err_dummy_rx:
 err_dummy_tx:
cpm_muram_free(bds_ofs);
 err_bds:
-   cpm_muram_free(pram_ofs);
+   if (!(mspi->flags & SPI_CPM1))
+   cpm_muram_free(cpm_muram_offset(mspi->pram));
 err_pram:
fsl_spi_free_dummy_rx();
return -ENOMEM;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3] spi: fsl-spi: use devm_ioremap_resource() to map parameter ram on CPM1

2015-04-22 Thread Christophe Leroy
On CPM2, the SPI parameter RAM is dynamically allocated in the
dualport RAM whereas in CPM1, it is statically allocated to a default
address with capability to relocate it somewhere else via the use of
CPM micropatch. The address of the parameter RAM is given by the boot
loader and expected to be mapped via devm_ioremap_resource()

In the current implementation, in function fsl_spi_cpm_get_pram()
there is a confusion between the SPI_BASE register and the base of the
SPI parameter RAM. Fortunatly, it is working properly with MPC866 and
MPC885 because they do set SPI_BASE, but on MPC860 and other old
MPC8xx that doesn't set SPI_BASE, pram_ofs is not properly set.
Also, the parameter RAM is not properly mapped with
devm_ioremap_resource() as it should but still gets accessible by
chance through the full RAM which is mapped from somewhere else.

This patch applies to the SPI driver the same principle as for the
CPM UART: when the CPM is of type CPM1, we simply do an
devm_ioremap_resource() of the area provided via the device tree.

Signed-off-by: Christophe Leroy 

---
 v2: Use devm_ioremap_resource() instead of_iomap()
 v3: Replaced of_iomap() by devm_ioremap_resource() in the patch text

 drivers/spi/spi-fsl-cpm.c | 35 ++-
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index e85ab1c..4e5c945 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "spi-fsl-cpm.h"
 #include "spi-fsl-lib.h"
@@ -264,17 +265,6 @@ static unsigned long fsl_spi_cpm_get_pram(struct 
mpc8xxx_spi *mspi)
if (mspi->flags & SPI_CPM2) {
pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
out_be16(spi_base, pram_ofs);
-   } else {
-   struct spi_pram __iomem *pram = spi_base;
-   u16 rpbase = in_be16(&pram->rpbase);
-
-   /* Microcode relocation patch applied? */
-   if (rpbase) {
-   pram_ofs = rpbase;
-   } else {
-   pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64);
-   out_be16(spi_base, pram_ofs);
-   }
}
 
iounmap(spi_base);
@@ -287,7 +277,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
struct device_node *np = dev->of_node;
const u32 *iprop;
int size;
-   unsigned long pram_ofs;
unsigned long bds_ofs;
 
if (!(mspi->flags & SPI_CPM_MODE))
@@ -314,8 +303,21 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
}
}
 
-   pram_ofs = fsl_spi_cpm_get_pram(mspi);
-   if (IS_ERR_VALUE(pram_ofs)) {
+   if (mspi->flags & SPI_CPM1) {
+   struct resource *res;
+
+   res = platform_get_resource(to_platform_device(dev),
+   IORESOURCE_MEM, 1);
+   mspi->pram = devm_ioremap_resource(dev, res);
+   } else {
+   unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi);
+
+   if (IS_ERR_VALUE(pram_ofs))
+   mspi->pram = NULL;
+   else
+   mspi->pram = cpm_muram_addr(pram_ofs);
+   }
+   if (mspi->pram == NULL) {
dev_err(dev, "can't allocate spi parameter ram\n");
goto err_pram;
}
@@ -341,8 +343,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
goto err_dummy_rx;
}
 
-   mspi->pram = cpm_muram_addr(pram_ofs);
-
mspi->tx_bd = cpm_muram_addr(bds_ofs);
mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd));
 
@@ -370,7 +370,8 @@ err_dummy_rx:
 err_dummy_tx:
cpm_muram_free(bds_ofs);
 err_bds:
-   cpm_muram_free(pram_ofs);
+   if (!(mspi->flags & SPI_CPM1))
+   cpm_muram_free(cpm_muram_offset(mspi->pram));
 err_pram:
fsl_spi_free_dummy_rx();
return -ENOMEM;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] spi: fsl-spi: fix devm_ioremap_resource() error case

2015-04-23 Thread Christophe Leroy
devm_ioremap_resource() doesn't return NULL but an ERR_PTR on error.

Reported-by: Jonas Gorsky 
Signed-off-by: Christophe Leroy 

---
 drivers/spi/spi-fsl-cpm.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c
index 4e5c945..1e66644 100644
--- a/drivers/spi/spi-fsl-cpm.c
+++ b/drivers/spi/spi-fsl-cpm.c
@@ -305,10 +305,15 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi)
 
if (mspi->flags & SPI_CPM1) {
struct resource *res;
+   void *pram;
 
res = platform_get_resource(to_platform_device(dev),
IORESOURCE_MEM, 1);
-   mspi->pram = devm_ioremap_resource(dev, res);
+   pram = devm_ioremap_resource(dev, res);
+   if (IS_ERR(pram))
+   mspi->pram = NULL;
+   else
+   mspi->pram = pram;
} else {
unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi);
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: powerpc32: rearrange instructions order in ip_fast_csum()

2015-04-28 Thread christophe leroy



Le 25/03/2015 02:22, Scott Wood a écrit :

On Tue, Feb 03, 2015 at 12:39:27PM +0100, LEROY Christophe wrote:

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/lib/checksum_32.S | 10 +++---
  1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index 6d67e05..5500704 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -26,13 +26,17 @@
  _GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzur5,4(r3)
-   addic.  r4,r4,-2
+   addic.  r4,r4,-4
addcr0,r0,r5
mtctr   r4
blelr-
-1: lwzur4,4(r3)
-   adder0,r0,r4
+   lwzur5,4(r3)
+   lwzur4,4(r3)

The blelr is pointless since len is guaranteed to be >= 5 (assuming that
comment is accurate), but now it's both pointless and in the wrong place,
since you haven't yet finished the four words that you subtracted from
r4.
The blelr is just there to protect the function against negative value 
of r4 hence ctr.
In any case, the returned result in that case in not correct, has we do 
not touch r3.


How about keeping the blelr, without the -, moving it after the initial
words, and changing the number of inital words to 5?

We can't just do blelr, we would need to fold the result first.
But indeed, this would be useless because I quickly checked and it seems 
that all functions calling ip_fast_csum()

check that the length is not lower than 5.
So I will just remove the blelr

Also maybe do all
the loads up front, since many PPC chips have a three cycle load latency
rather than two.

ok

Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
http://www.avast.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 1/2] powerpc32: put csum_tcpudp_magic inline

2015-04-28 Thread christophe leroy



Le 25/03/2015 03:10, Scott Wood a écrit :

On Tue, 2015-02-03 at 12:39 +0100, Christophe Leroy wrote:

csum_tcpudp_magic() is only a few instructions, and does not modifies any other
register than the returned result. So it is not worth having it as a separate
function and suffer function branching and saving of volatile registers.
This patch makes it inline by use of the already existing csum_tcpudp_nofold()
function.

Signed-off-by: Christophe Leroy 

---
v2: no change

  arch/powerpc/include/asm/checksum.h | 15 +++
  arch/powerpc/lib/checksum_32.S  | 16 
  2 files changed, 15 insertions(+), 16 deletions(-)

The 64-bit version is pretty similar to the 32-bit -- why only use
csum_tcpudp_nofold() on 32-bit?


I did it only on 32-bit because I have no way to test it on 64-bits, but 
I can do it for 64 bits as well, no problem.


Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
http://www.avast.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2,2/2] powerpc32: add support for csum_add()

2015-04-28 Thread christophe leroy



Le 25/03/2015 02:30, Scott Wood a écrit :

On Tue, Feb 03, 2015 at 12:39:27PM +0100, LEROY Christophe wrote:

The C version of csum_add() as defined in include/net/checksum.h gives the
following assembly:
0:   7c 04 1a 14 add r0,r4,r3
4:   7c 64 00 10 subfc   r3,r4,r0
8:   7c 63 19 10 subfe   r3,r3,r3
c:   7c 63 00 50 subfr3,r3,r0

include/net/checksum.h also offers the possibility to define an arch specific
function.
This patch provides a ppc32 specific csum_add() inline function.

What makes it 32-bit specific?


As far as I understand, the 64-bit will do a 64 bit addition, so we will 
have to handle differently the carry, can't just be an addze like in 32-bit.


The generated code is most likely different on ppc64. I have no ppc64 
compiler so I can't check what gcc generates for the following code:


|__wsumcsum_add(__wsum csum,  __wsum addend)
{
u32 res= (__force u32)csum;
res+= (__force u32)addend;
return  (__force __wsum)(res+ (res< (__force u32)addend));
}|


Can someone with a ppc64 compiler tell what we get ?

Christophe


---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
http://www.avast.com
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/4] powerpc32: use cacheable alternatives of memcpy and memset

2015-05-12 Thread Christophe Leroy
This patchset implements use of cacheable versions of memset and
memcpy when the len is greater than the cacheline size and the
destination is in RAM.

On MPC885, we observe a 7% rate increase on FTP transfer

Christophe Leroy (4):
  Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero
functions"
  powerpc32: swap r4 and r5 in cacheable_memzero
  powerpc32: memset(0): use cacheable_memzero
  powerpc32: memcpy: use cacheable_memcpy

 arch/powerpc/lib/copy_32.S | 148 +
 1 file changed, 148 insertions(+)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/4] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"

2015-05-12 Thread Christophe Leroy
This partially reverts
commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions
("f909a35bdfb7cb350d078a2cf888162eeb20381c")'

Functions cacheable_memcpy/memzero are more efficient than
memcpy/memset as they use the dcbz instruction which avoids refill
of the cacheline with the data that we will overwrite.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 127 +
 1 file changed, 127 insertions(+)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 6813f80..55f19f9 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -69,6 +69,54 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+   mr  r5,r4
+   li  r4,0
+   addir6,r3,-4
+   cmplwi  0,r5,4
+   blt 7f
+   stwur4,4(r6)
+   beqlr
+   andi.   r0,r6,3
+   add r5,r0,r5
+   subfr6,r0,r6
+   clrlwi  r7,r6,32-LG_CACHELINE_BYTES
+   add r8,r7,r5
+   srwir9,r8,LG_CACHELINE_BYTES
+   addic.  r9,r9,-1/* total number of complete cachelines */
+   ble 2f
+   xorir0,r7,CACHELINE_MASK & ~3
+   srwi.   r0,r0,2
+   beq 3f
+   mtctr   r0
+4: stwur4,4(r6)
+   bdnz4b
+3: mtctr   r9
+   li  r7,4
+10:dcbzr7,r6
+   addir6,r6,CACHELINE_BYTES
+   bdnz10b
+   clrlwi  r5,r8,32-LG_CACHELINE_BYTES
+   addir5,r5,4
+2: srwir0,r5,2
+   mtctr   r0
+   bdz 6f
+1: stwur4,4(r6)
+   bdnz1b
+6: andi.   r5,r5,3
+7: cmpwi   0,r5,0
+   beqlr
+   mtctr   r5
+   addir6,r6,3
+8: stbur4,1(r6)
+   bdnz8b
+   blr
+
 _GLOBAL(memset)
rlwimi  r4,r4,8,16,23
rlwimi  r4,r4,16,0,15
@@ -94,6 +142,85 @@ _GLOBAL(memset)
bdnz8b
blr
 
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+   add r7,r3,r5/* test if the src & dst overlap */
+   add r8,r4,r5
+   cmplw   0,r4,r7
+   cmplw   1,r3,r8
+   crand   0,0,4   /* cr0.lt &= cr1.lt */
+   blt memcpy  /* if regions overlap */
+
+   addir4,r4,-4
+   addir6,r3,-4
+   neg r0,r3
+   andi.   r0,r0,CACHELINE_MASK/* # bytes to start of cache line */
+   beq 58f
+
+   cmplw   0,r5,r0 /* is this more than total to do? */
+   blt 63f /* if not much to do */
+   andi.   r8,r0,3 /* get it word-aligned first */
+   subfr5,r0,r5
+   mtctr   r8
+   beq+61f
+70:lbz r9,4(r4)/* do some bytes */
+   stb r9,4(r6)
+   addir4,r4,1
+   addir6,r6,1
+   bdnz70b
+61:srwi.   r0,r0,2
+   mtctr   r0
+   beq 58f
+72:lwzur9,4(r4)/* do some words */
+   stwur9,4(r6)
+   bdnz72b
+
+58:srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+   clrlwi  r5,r5,32-LG_CACHELINE_BYTES
+   li  r11,4
+   mtctr   r0
+   beq 63f
+53:
+   dcbzr11,r6
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+   COPY_16_BYTES
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+   COPY_16_BYTES
+   COPY_16_BYTES
+   COPY_16_BYTES
+   COPY_16_BYTES
+#endif
+#endif
+#endif
+   bdnz53b
+
+63:srwi.   r0,r5,2
+   mtctr   r0
+   beq 64f
+30:lwzur0,4(r4)
+   stwur0,4(r6)
+   bdnz30b
+
+64:andi.   r0,r5,3
+   mtctr   r0
+   beq+65f
+40:lbz r0,4(r4)
+   stb r0,4(r6)
+   addir4,r4,1
+   addir6,r6,1
+   bdnz40b
+65:blr
+
 _GLOBAL(memmove)
cmplw   0,r3,r4
bgt backwards_memcpy
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/4] powerpc32: swap r4 and r5 in cacheable_memzero

2015-05-12 Thread Christophe Leroy
We swap r4 and r5, this avoids having to move the len contained in r4
into r5

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 29 ++---
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9..cbca76c 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -75,18 +75,17 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
  * area is cacheable.  -- paulus
  */
 _GLOBAL(cacheable_memzero)
-   mr  r5,r4
-   li  r4,0
+   li  r5,0
addir6,r3,-4
-   cmplwi  0,r5,4
+   cmplwi  0,r4,4
blt 7f
-   stwur4,4(r6)
+   stwur5,4(r6)
beqlr
andi.   r0,r6,3
-   add r5,r0,r5
+   add r4,r0,r4
subfr6,r0,r6
clrlwi  r7,r6,32-LG_CACHELINE_BYTES
-   add r8,r7,r5
+   add r8,r7,r4
srwir9,r8,LG_CACHELINE_BYTES
addic.  r9,r9,-1/* total number of complete cachelines */
ble 2f
@@ -94,26 +93,26 @@ _GLOBAL(cacheable_memzero)
srwi.   r0,r0,2
beq 3f
mtctr   r0
-4: stwur4,4(r6)
+4: stwur5,4(r6)
bdnz4b
 3: mtctr   r9
li  r7,4
 10:dcbzr7,r6
addir6,r6,CACHELINE_BYTES
bdnz10b
-   clrlwi  r5,r8,32-LG_CACHELINE_BYTES
-   addir5,r5,4
-2: srwir0,r5,2
+   clrlwi  r4,r8,32-LG_CACHELINE_BYTES
+   addir4,r4,4
+2: srwir0,r4,2
mtctr   r0
bdz 6f
-1: stwur4,4(r6)
+1: stwur5,4(r6)
bdnz1b
-6: andi.   r5,r5,3
-7: cmpwi   0,r5,0
+6: andi.   r4,r4,3
+7: cmpwi   0,r4,0
beqlr
-   mtctr   r5
+   mtctr   r4
addir6,r6,3
-8: stbur4,1(r6)
+8: stbur5,1(r6)
bdnz8b
blr
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/4] powerpc32: memset(0): use cacheable_memzero

2015-05-12 Thread Christophe Leroy
cacheable_memzero uses dcbz instruction and is more efficient than
memset(0) when the destination is in RAM

This patch renames memset as generic_memset, and defines memset
as a prolog to cacheable_memzero. This prolog checks if the byte
to set is 0 and if the buffer is in RAM. If not, it falls back to
generic_memcpy()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index cbca76c..d8a9a86 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define COPY_16_BYTES  \
lwz r7,4(r4);   \
@@ -74,6 +75,18 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
  * to set them to zero.  This requires that the destination
  * area is cacheable.  -- paulus
  */
+_GLOBAL(memset)
+   cmplwi  r4,0
+   bne-generic_memset
+   cmplwi  r5,L1_CACHE_BYTES
+   blt-generic_memset
+   lis r8,max_pfn@ha
+   lwz r8,max_pfn@l(r8)
+   tophys  (r9,r3)
+   srwir9,r9,PAGE_SHIFT
+   cmplw   r9,r8
+   bge-generic_memset
+   mr  r4,r5
 _GLOBAL(cacheable_memzero)
li  r5,0
addir6,r3,-4
@@ -116,7 +129,7 @@ _GLOBAL(cacheable_memzero)
bdnz8b
blr
 
-_GLOBAL(memset)
+_GLOBAL(generic_memset)
rlwimi  r4,r4,8,16,23
rlwimi  r4,r4,16,0,15
addir6,r3,-4
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/4] powerpc32: memcpy: use cacheable_memcpy

2015-05-12 Thread Christophe Leroy
cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM

This patch renames memcpy as generic_memcpy, and defines memcpy as a
prolog to cacheable_memcpy. This prolog checks if the buffer is
in RAM. If not, it falls back to generic_memcpy()

On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 23 ---
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index d8a9a86..8f76d49 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -161,13 +161,27 @@ _GLOBAL(generic_memset)
  * We only use this version if the source and dest don't overlap.
  * -- paulus.
  */
+_GLOBAL(memmove)
+   cmplw   0,r3,r4
+   bgt backwards_memcpy
+   /* fall through */
+
+_GLOBAL(memcpy)
+   cmplwi  r5,L1_CACHE_BYTES
+   blt-generic_memcpy
+   lis r8,max_pfn@ha
+   lwz r8,max_pfn@l(r8)
+   tophys  (r9,r3)
+   srwir9,r9,PAGE_SHIFT
+   cmplw   r9,r8
+   bge-generic_memcpy
 _GLOBAL(cacheable_memcpy)
add r7,r3,r5/* test if the src & dst overlap */
add r8,r4,r5
cmplw   0,r4,r7
cmplw   1,r3,r8
crand   0,0,4   /* cr0.lt &= cr1.lt */
-   blt memcpy  /* if regions overlap */
+   blt generic_memcpy  /* if regions overlap */
 
addir4,r4,-4
addir6,r3,-4
@@ -233,12 +247,7 @@ _GLOBAL(cacheable_memcpy)
bdnz40b
 65:blr
 
-_GLOBAL(memmove)
-   cmplw   0,r3,r4
-   bgt backwards_memcpy
-   /* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
srwi.   r7,r5,3
addir6,r3,-4
addir4,r4,-4
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/4] powerpc32: memset(0): use cacheable_memzero

2015-05-14 Thread christophe leroy



Le 14/05/2015 02:55, Scott Wood a écrit :

On Tue, 2015-05-12 at 15:32 +0200, Christophe Leroy wrote:

cacheable_memzero uses dcbz instruction and is more efficient than
memset(0) when the destination is in RAM

This patch renames memset as generic_memset, and defines memset
as a prolog to cacheable_memzero. This prolog checks if the byte
to set is 0 and if the buffer is in RAM. If not, it falls back to
generic_memcpy()

Signed-off-by: Christophe Leroy 
---
  arch/powerpc/lib/copy_32.S | 15 ++-
  1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index cbca76c..d8a9a86 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -12,6 +12,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #define COPY_16_BYTES		\

lwz r7,4(r4);   \
@@ -74,6 +75,18 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
   * to set them to zero.  This requires that the destination
   * area is cacheable.  -- paulus
   */
+_GLOBAL(memset)
+   cmplwi  r4,0
+   bne-generic_memset
+   cmplwi  r5,L1_CACHE_BYTES
+   blt-generic_memset
+   lis r8,max_pfn@ha
+   lwz r8,max_pfn@l(r8)
+   tophys  (r9,r3)
+   srwir9,r9,PAGE_SHIFT
+   cmplw   r9,r8
+   bge-generic_memset
+   mr  r4,r5

max_pfn includes highmem, and tophys only works on normal kernel
addresses.
Is there any other simple way to determine whether an address is in RAM 
or not ?


I did that because of the below function from mm/mem.c

|int  page_is_ram(unsigned long  pfn)
{
#ifndef CONFIG_PPC64/* XXX for now */
return  pfn<  max_pfn;
#else
unsigned long  paddr= (pfn<<  PAGE_SHIFT);
struct  memblock_region*reg;

for_each_memblock(memory,  reg)
if  (paddr>=  reg->base&&  paddr< (reg->base+  reg->size))
return  1;
return  0;
#endif
}
|





If we were to point memset_io, memcpy_toio, etc. at noncacheable
versions, are there any other callers left that can reasonably point at
uncacheable memory?
Do you mean we could just consider that memcpy() and memset() are called 
only with destination on RAM and thus we could avoid the check ?
copy_tofrom_user() already does this assumption (allthought a user app 
could possibly provide a buffer located in an ALSA mapped IO area)


Christophe


---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
http://www.avast.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 1/4] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"

2015-05-15 Thread christophe leroy


Le 14/05/2015 02:49, Scott Wood a écrit :

On Tue, 2015-05-12 at 15:32 +0200, Christophe Leroy wrote:

This partially reverts
commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions
("f909a35bdfb7cb350d078a2cf888162eeb20381c")'

I don't have that SHA.  Do you mean
b05ae4ee602b7dc90771408ccf0972e1b3801a35?

Right, took it from the wrong tree sorry.



Functions cacheable_memcpy/memzero are more efficient than
memcpy/memset as they use the dcbz instruction which avoids refill
of the cacheline with the data that we will overwrite.

I don't see anything in this patchset that addresses the "NOTE: The old
routines are just flat buggy on kernels that support hardware with
different cacheline sizes" comment.
I believe the NOTE means that if a kernel is compiled for several CPUs 
having different cache line size,
then it will not work. But it is also the case of other functions using 
dcbz instruction, like copy_page() clear_page() copy_tofrom_user().


And indeed, this seems only possible in three cases:
1/ With CONFIG_44x as 47x has different size than 44x and 46x. However 
it is explicitly stated in arch/powerpc/platforms/44x/Kconfig : "config 
PPC_47x This option enables support for the 47x family of processors and 
is not currently compatible with other 44x or 46x varients"
2/ With CONFIG_PPC_85xx, as PPC_E500MC has different size than other 
E500. However it is explicitly stated in 
arch/powerpc/platforms/Kconfig.cputype : "config PPC_E500MC This must be 
enabled for running on e500mc (and derivatives such as e5500/e6500), and 
must be disabled for running on e500v1 or e500v2."
3/ With CONFIG_403GCX as 403GCX has different size than other 40x. 
However it seems to be no way to select CONFIG_403GCX from 
arch/powerpc/platforms/40x/Kconfig


Christophe

---
L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel 
antivirus Avast.
http://www.avast.com

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/6] powerpc32: replace memcpy and memset by cacheable alternatives

2015-05-19 Thread Christophe Leroy
This patchset implements use of cacheable versions of memset and
memcpy since when the destination is not cacheable, memset_io
and memcpy_toio are used.

On MPC885, we observe a 7% rate increase on FTP transfer

Christophe Leroy (6):
  powerpc: use memset_io() to clear CPM Muram
  Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero
functions"
  powerpc32: memset(0): use cacheable_memzero
  powerpc32: Merge the new memset() with the old one
  powerpc32: cacheable_memcpy becomes memcpy
  powerpc32: Few optimisations in memcpy

 arch/powerpc/lib/copy_32.S   | 109 ++-
 arch/powerpc/sysdev/cpm_common.c |   2 +-
 2 files changed, 109 insertions(+), 2 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/6] powerpc: use memset_io() to clear CPM Muram

2015-05-19 Thread Christophe Leroy
CPM muram is not cached, so use memset_io() instead of memset()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/sysdev/cpm_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 4f78695..e2ea519 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -147,7 +147,7 @@ unsigned long cpm_muram_alloc(unsigned long size, unsigned 
long align)
spin_lock_irqsave(&cpm_muram_lock, flags);
cpm_muram_info.alignment = align;
start = rh_alloc(&cpm_muram_info, size, "commproc");
-   memset(cpm_muram_addr(start), 0, size);
+   memset_io(cpm_muram_addr(start), 0, size);
spin_unlock_irqrestore(&cpm_muram_lock, flags);
 
return start;
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/6] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"

2015-05-19 Thread Christophe Leroy
This partially reverts
commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions
("b05ae4ee602b7dc90771408ccf0972e1b3801a35")'

Functions cacheable_memcpy/memzero are more efficient than
memcpy/memset as they use the dcbz instruction which avoids refill
of the cacheline with the data that we will overwrite.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 127 +
 1 file changed, 127 insertions(+)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 6813f80..55f19f9 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -69,6 +69,54 @@ CACHELINE_BYTES = L1_CACHE_BYTES
 LG_CACHELINE_BYTES = L1_CACHE_SHIFT
 CACHELINE_MASK = (L1_CACHE_BYTES-1)
 
+/*
+ * Use dcbz on the complete cache lines in the destination
+ * to set them to zero.  This requires that the destination
+ * area is cacheable.  -- paulus
+ */
+_GLOBAL(cacheable_memzero)
+   mr  r5,r4
+   li  r4,0
+   addir6,r3,-4
+   cmplwi  0,r5,4
+   blt 7f
+   stwur4,4(r6)
+   beqlr
+   andi.   r0,r6,3
+   add r5,r0,r5
+   subfr6,r0,r6
+   clrlwi  r7,r6,32-LG_CACHELINE_BYTES
+   add r8,r7,r5
+   srwir9,r8,LG_CACHELINE_BYTES
+   addic.  r9,r9,-1/* total number of complete cachelines */
+   ble 2f
+   xorir0,r7,CACHELINE_MASK & ~3
+   srwi.   r0,r0,2
+   beq 3f
+   mtctr   r0
+4: stwur4,4(r6)
+   bdnz4b
+3: mtctr   r9
+   li  r7,4
+10:dcbzr7,r6
+   addir6,r6,CACHELINE_BYTES
+   bdnz10b
+   clrlwi  r5,r8,32-LG_CACHELINE_BYTES
+   addir5,r5,4
+2: srwir0,r5,2
+   mtctr   r0
+   bdz 6f
+1: stwur4,4(r6)
+   bdnz1b
+6: andi.   r5,r5,3
+7: cmpwi   0,r5,0
+   beqlr
+   mtctr   r5
+   addir6,r6,3
+8: stbur4,1(r6)
+   bdnz8b
+   blr
+
 _GLOBAL(memset)
rlwimi  r4,r4,8,16,23
rlwimi  r4,r4,16,0,15
@@ -94,6 +142,85 @@ _GLOBAL(memset)
bdnz8b
blr
 
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic.  This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBAL(cacheable_memcpy)
+   add r7,r3,r5/* test if the src & dst overlap */
+   add r8,r4,r5
+   cmplw   0,r4,r7
+   cmplw   1,r3,r8
+   crand   0,0,4   /* cr0.lt &= cr1.lt */
+   blt memcpy  /* if regions overlap */
+
+   addir4,r4,-4
+   addir6,r3,-4
+   neg r0,r3
+   andi.   r0,r0,CACHELINE_MASK/* # bytes to start of cache line */
+   beq 58f
+
+   cmplw   0,r5,r0 /* is this more than total to do? */
+   blt 63f /* if not much to do */
+   andi.   r8,r0,3 /* get it word-aligned first */
+   subfr5,r0,r5
+   mtctr   r8
+   beq+61f
+70:lbz r9,4(r4)/* do some bytes */
+   stb r9,4(r6)
+   addir4,r4,1
+   addir6,r6,1
+   bdnz70b
+61:srwi.   r0,r0,2
+   mtctr   r0
+   beq 58f
+72:lwzur9,4(r4)/* do some words */
+   stwur9,4(r6)
+   bdnz72b
+
+58:srwi.   r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+   clrlwi  r5,r5,32-LG_CACHELINE_BYTES
+   li  r11,4
+   mtctr   r0
+   beq 63f
+53:
+   dcbzr11,r6
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 32
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 64
+   COPY_16_BYTES
+   COPY_16_BYTES
+#if L1_CACHE_BYTES >= 128
+   COPY_16_BYTES
+   COPY_16_BYTES
+   COPY_16_BYTES
+   COPY_16_BYTES
+#endif
+#endif
+#endif
+   bdnz53b
+
+63:srwi.   r0,r5,2
+   mtctr   r0
+   beq 64f
+30:lwzur0,4(r4)
+   stwur0,4(r6)
+   bdnz30b
+
+64:andi.   r0,r5,3
+   mtctr   r0
+   beq+65f
+40:lbz r0,4(r4)
+   stb r0,4(r6)
+   addir4,r4,1
+   addir6,r6,1
+   bdnz40b
+65:blr
+
 _GLOBAL(memmove)
cmplw   0,r3,r4
bgt backwards_memcpy
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/6] powerpc32: memset(0): use cacheable_memzero

2015-05-19 Thread Christophe Leroy
cacheable_memzero uses dcbz instruction and is more efficient than
memset(0) when the destination is in RAM

This patch renames memset as generic_memset, and defines memset
as a prolog to cacheable_memzero. This prolog checks if the byte
to set is 0. If not, it falls back to generic_memcpy()

cacheable_memzero disappears as it is not referenced anywhere anymore

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9..0b4f954 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -74,9 +74,9 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
  * to set them to zero.  This requires that the destination
  * area is cacheable.  -- paulus
  */
-_GLOBAL(cacheable_memzero)
-   mr  r5,r4
-   li  r4,0
+_GLOBAL(memset)
+   cmplwi  r4,0
+   bne-generic_memset
addir6,r3,-4
cmplwi  0,r5,4
blt 7f
@@ -117,7 +117,7 @@ _GLOBAL(cacheable_memzero)
bdnz8b
blr
 
-_GLOBAL(memset)
+_GLOBAL(generic_memset)
rlwimi  r4,r4,8,16,23
rlwimi  r4,r4,16,0,15
addir6,r3,-4
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 4/6] powerpc32: Merge the new memset() with the old one

2015-05-19 Thread Christophe Leroy
cacheable_memzero() which has become the new memset() and the old
memset() are quite similar, so just merge them.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 34 +++---
 1 file changed, 7 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 0b4f954..9262071 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -75,8 +75,9 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1)
  * area is cacheable.  -- paulus
  */
 _GLOBAL(memset)
-   cmplwi  r4,0
-   bne-generic_memset
+   rlwimi  r4,r4,8,16,23
+   rlwimi  r4,r4,16,0,15
+
addir6,r3,-4
cmplwi  0,r5,4
blt 7f
@@ -85,6 +86,9 @@ _GLOBAL(memset)
andi.   r0,r6,3
add r5,r0,r5
subfr6,r0,r6
+   cmplwi  0,r4,0
+   bne 2f  /* Use normal procedure if r4 is not zero */
+
clrlwi  r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
srwir9,r8,LG_CACHELINE_BYTES
@@ -103,32 +107,8 @@ _GLOBAL(memset)
bdnz10b
clrlwi  r5,r8,32-LG_CACHELINE_BYTES
addir5,r5,4
-2: srwir0,r5,2
-   mtctr   r0
-   bdz 6f
-1: stwur4,4(r6)
-   bdnz1b
-6: andi.   r5,r5,3
-7: cmpwi   0,r5,0
-   beqlr
-   mtctr   r5
-   addir6,r6,3
-8: stbur4,1(r6)
-   bdnz8b
-   blr
 
-_GLOBAL(generic_memset)
-   rlwimi  r4,r4,8,16,23
-   rlwimi  r4,r4,16,0,15
-   addir6,r3,-4
-   cmplwi  0,r5,4
-   blt 7f
-   stwur4,4(r6)
-   beqlr
-   andi.   r0,r6,3
-   add r5,r0,r5
-   subfr6,r0,r6
-   srwir0,r5,2
+2: srwir0,r5,2
mtctr   r0
bdz 6f
 1: stwur4,4(r6)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 5/6] powerpc32: cacheable_memcpy becomes memcpy

2015-05-19 Thread Christophe Leroy
cacheable_memcpy uses dcbz instruction and is more efficient than
memcpy when the destination is in RAM. If the destination is in an
io area, memcpy_toio() is normally used, not memcpy

This patch renames memcpy as generic_memcpy, and renames
cacheable_memcpy as memcpy

On MPC885, we get approximatly 7% increase of the transfer rate
on an FTP reception

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 9262071..1d49c74 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -129,13 +129,18 @@ _GLOBAL(memset)
  * We only use this version if the source and dest don't overlap.
  * -- paulus.
  */
-_GLOBAL(cacheable_memcpy)
+_GLOBAL(memmove)
+   cmplw   0,r3,r4
+   bgt backwards_memcpy
+   /* fall through */
+
+_GLOBAL(memcpy)
add r7,r3,r5/* test if the src & dst overlap */
add r8,r4,r5
cmplw   0,r4,r7
cmplw   1,r3,r8
crand   0,0,4   /* cr0.lt &= cr1.lt */
-   blt memcpy  /* if regions overlap */
+   blt generic_memcpy  /* if regions overlap */
 
addir4,r4,-4
addir6,r3,-4
@@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy)
bdnz40b
 65:blr
 
-_GLOBAL(memmove)
-   cmplw   0,r3,r4
-   bgt backwards_memcpy
-   /* fall through */
-
-_GLOBAL(memcpy)
+_GLOBAL(generic_memcpy)
srwi.   r7,r5,3
addir6,r3,-4
addir4,r4,-4
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 6/6] powerpc32: Few optimisations in memcpy

2015-05-19 Thread Christophe Leroy
This patch adds a few optimisations in memcpy functions by using
lbzu/stbu instead of lxb/stb and by re-ordering insn inside a loop
to reduce latency due to loading

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/copy_32.S | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 1d49c74..2ef50c6 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -155,9 +155,9 @@ _GLOBAL(memcpy)
mtctr   r8
beq+61f
 70:lbz r9,4(r4)/* do some bytes */
-   stb r9,4(r6)
addir4,r4,1
addir6,r6,1
+   stb r9,3(r6)
bdnz70b
 61:srwi.   r0,r0,2
mtctr   r0
@@ -199,10 +199,10 @@ _GLOBAL(memcpy)
 64:andi.   r0,r5,3
mtctr   r0
beq+65f
-40:lbz r0,4(r4)
-   stb r0,4(r6)
-   addir4,r4,1
-   addir6,r6,1
+   addir4,r4,3
+   addir6,r6,3
+40:lbzur0,1(r4)
+   stbur0,1(r6)
bdnz40b
 65:blr
 
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/2] Optimise some IP checksum functions.

2015-05-19 Thread Christophe Leroy
This patchset provides a few optimisations related to IP checksum functions.

Christophe Leroy (2):
  powerpc: put csum_tcpudp_magic inline
  powerpc: add support for csum_add()

 arch/powerpc/include/asm/checksum.h | 37 -
 arch/powerpc/lib/checksum_32.S  | 16 
 arch/powerpc/lib/checksum_64.S  | 21 -
 3 files changed, 28 insertions(+), 46 deletions(-)

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/2] powerpc: put csum_tcpudp_magic inline

2015-05-19 Thread Christophe Leroy
csum_tcpudp_magic() is only a few instructions, and does modify
really few registers. So it is not worth having it as a separate
function and suffer function branching and saving of volatile
registers.

This patch makes it inline by use of the already existing
csum_tcpudp_nofold() function.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/checksum.h | 21 -
 arch/powerpc/lib/checksum_32.S  | 16 
 arch/powerpc/lib/checksum_64.S  | 21 -
 3 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/checksum.h 
b/arch/powerpc/include/asm/checksum.h
index 8251a3b..5e43d2d 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -20,15 +20,6 @@
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 
 /*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-   unsigned short len,
-   unsigned short proto,
-   __wsum sum);
-
-/*
  * computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
  *
@@ -127,6 +118,18 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, 
__be32 daddr,
 #endif
 }
 
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+   unsigned short len,
+   unsigned short proto,
+   __wsum sum)
+{
+   return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
 #endif
 #endif /* __KERNEL__ */
 #endif
diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S
index e23a436..d6fab08 100644
--- a/arch/powerpc/lib/checksum_32.S
+++ b/arch/powerpc/lib/checksum_32.S
@@ -41,22 +41,6 @@ _GLOBAL(ip_fast_csum)
blr
 
 /*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(saddr, daddr, len, proto, sum)
- */
-_GLOBAL(csum_tcpudp_magic)
-   rlwimi  r5,r6,16,0,15   /* put proto in upper half of len */
-   addcr0,r3,r4/* add 4 32-bit words together */
-   adder0,r0,r5
-   adder0,r0,r7
-   addze   r0,r0   /* add in final carry */
-   rlwinm  r3,r0,16,0,31   /* fold two halves together */
-   add r3,r0,r3
-   not r3,r3
-   srwir3,r3,16
-   blr
-
-/*
  * computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit)
  *
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index 57a0720..f3ef354 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -45,27 +45,6 @@ _GLOBAL(ip_fast_csum)
blr
 
 /*
- * Compute checksum of TCP or UDP pseudo-header:
- *   csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum)
- * No real gain trying to do this specially for 64 bit, but
- * the 32 bit addition may spill into the upper bits of
- * the doubleword so we still must fold it down from 64.
- */
-_GLOBAL(csum_tcpudp_magic)
-   rlwimi  r5,r6,16,0,15   /* put proto in upper half of len */
-   addcr0,r3,r4/* add 4 32-bit words together */
-   adder0,r0,r5
-   adder0,r0,r7
-rldicl  r4,r0,32,0  /* fold 64 bit value */
-add r0,r4,r0
-srdir0,r0,32
-   rlwinm  r3,r0,16,0,31   /* fold two halves together */
-   add r3,r0,r3
-   not r3,r3
-   srwir3,r3,16
-   blr
-
-/*
  * Computes the checksum of a memory block at buff, length len,
  * and adds in "sum" (32-bit).
  *
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/2] powerpc: add support for csum_add()

2015-05-19 Thread Christophe Leroy
The C version of csum_add() as defined in include/net/checksum.h gives the
following assembly in ppc32:
   0:   7c 04 1a 14 add r0,r4,r3
   4:   7c 64 00 10 subfc   r3,r4,r0
   8:   7c 63 19 10 subfe   r3,r3,r3
   c:   7c 63 00 50 subfr3,r3,r0
and the following in ppc64:
   0xc0001af8 <+0>: add r3,r3,r4
   0xc0001afc <+4>: cmplw   cr7,r3,r4
   0xc0001b00 <+8>: mfcrr4
   0xc0001b04 <+12>:rlwinm  r4,r4,29,31,31
   0xc0001b08 <+16>:add r3,r4,r3
   0xc0001b0c <+20>:clrldi  r3,r3,32
   0xc0001b10 <+24>:blr

include/net/checksum.h also offers the possibility to define an arch specific
function.
This patch provides a specific csum_add() inline function.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/checksum.h | 16 
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/include/asm/checksum.h 
b/arch/powerpc/include/asm/checksum.h
index 5e43d2d..e8d9ef4 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -130,6 +130,22 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, 
__be32 daddr,
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
 }
 
+#define HAVE_ARCH_CSUM_ADD
+static inline __wsum csum_add(__wsum csum, __wsum addend)
+{
+#ifdef __powerpc64__
+   u64 res = (__force u64)csum;
+
+   res += (__force u64)addend;
+   return (__force __wsum)((u32)res + (res >> 32));
+#else
+   asm("addc %0,%0,%1;"
+   "addze %0,%0;"
+   : "+r" (csum) : "r" (addend));
+   return csum;
+#endif
+}
+
 #endif
 #endif /* __KERNEL__ */
 #endif
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 00/19] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages

2014-08-29 Thread Christophe Leroy
This patchset:
1) provides several MMU TLB handling optimisation on MPC8xx.
2) adds support of 16k pages on MPC8xx.
All changes have been successfully tested on a custom board equipped with MPC885

The two differences with first version of the patch are:
1) I removed the patch number 10, which was implementing a 16 bit alignment of 
the
PGDIR. It is not worth potentially wasting up to 64k of memory just for 
removing one
instruction (ori).
2) I managed to preserve r11 while calculating the level 2 address, therefore
no more need to save r11 into CR.

Signed-off-by: Christophe Leroy 
Tested-by: Christophe Leroy 

 arch/powerpc/Kconfig |2 +-
 arch/powerpc/include/asm/mmu-8xx.h   |2 +
 arch/powerpc/include/asm/pgtable-ppc32.h |   21 ++
 arch/powerpc/include/asm/pte-8xx.h   |7 +-
 arch/powerpc/include/asm/reg.h   |3 +-
 arch/powerpc/kernel/head_8xx.S   |  342 +++-
 6 files changed, 187 insertions(+), 190 deletions(-)
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 01/19] powerpc/8xx: Declare SPRG2 as a SCRATCH register

2014-08-29 Thread Christophe Leroy
Since coming 469d62be9263b92f2c3329540cbb1c076111f4f3, SPRG2 is used as a
scratch register just like SPRG0 and SPRG1. So Declare it as such and fix
the comment which is not valid anymore since that commit.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/include/asm/reg.h |3 ++-
 arch/powerpc/kernel/head_8xx.S |   10 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index cb9c174..b6a7d62 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -888,7 +888,7 @@
  * 32-bit 8xx:
  * - SPRG0 scratch for exception vectors
  * - SPRG1 scratch for exception vectors
- * - SPRG2 apparently unused but initialized
+ * - SPRG2 scratch for exception vectors
  *
  */
 #ifdef CONFIG_PPC64
@@ -994,6 +994,7 @@
 #ifdef CONFIG_8xx
 #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0
 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1
+#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2
 #endif
 
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 55d12fb..1329c5a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -301,7 +301,7 @@ InstructionTLBMiss:
stw r11, 4(r0)
 #else
mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG2, r11
+   mtspr   SPRN_SPRG_SCRATCH2, r11
 #endif
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
@@ -363,7 +363,7 @@ InstructionTLBMiss:
mfspr   r10, SPRN_DAR
mtcrr10
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
lwz r11, 0(r0)
mtcrr11
@@ -386,7 +386,7 @@ InstructionTLBMiss:
mtcrr10
li  r11, 0x00f0
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
lwz r11, 0(r0)
mtcrr11
@@ -409,7 +409,7 @@ DataStoreTLBMiss:
stw r11, 4(r0)
 #else
mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG2, r11
+   mtspr   SPRN_SPRG_SCRATCH2, r11
 #endif
mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
 
@@ -487,7 +487,7 @@ DataStoreTLBMiss:
mfspr   r10, SPRN_DAR
mtcrr10
mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG2
+   mfspr   r11, SPRN_SPRG_SCRATCH2
 #else
mtspr   SPRN_DAR, r11   /* Tag DAR */
lwz r11, 0(r0)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 02/19] powerpc/8xx: Use SCRATCH0 and SCRATCH1 also for TLB handlers

2014-08-29 Thread Christophe Leroy
SCRATCH0 and SCRATCH1 are only used in Exceptions prologs where no other
exception can happen. There is therefore no need to preserve them accross
TLB handlers, we can use them there as in other exceptions. One of the
advantages is that they do not suffer CPU6 errata unlike M_TW register.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |  104 --
 1 files changed, 36 insertions(+), 68 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 1329c5a..3af6db1 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -104,12 +104,15 @@ turn_on_mmu:
  * task's thread_struct.
  */
 #define EXCEPTION_PROLOG   \
-   mtspr   SPRN_SPRG_SCRATCH0,r10; \
-   mtspr   SPRN_SPRG_SCRATCH1,r11; \
-   mfcrr10;\
+   EXCEPTION_PROLOG_0; \
EXCEPTION_PROLOG_1; \
EXCEPTION_PROLOG_2
 
+#define EXCEPTION_PROLOG_0 \
+   mtspr   SPRN_SPRG_SCRATCH0,r10; \
+   mtspr   SPRN_SPRG_SCRATCH1,r11; \
+   mfcrr10
+
 #define EXCEPTION_PROLOG_1 \
mfspr   r11,SPRN_SRR1;  /* check whether user or kernel */ \
andi.   r11,r11,MSR_PR; \
@@ -145,6 +148,14 @@ turn_on_mmu:
SAVE_2GPRS(7, r11)
 
 /*
+ * Exception exit code.
+ */
+#define EXCEPTION_EPILOG_0 \
+   mtcrr10;\
+   mfspr   r10,SPRN_SPRG_SCRATCH0; \
+   mfspr   r11,SPRN_SPRG_SCRATCH1
+
+/*
  * Note: code which follows this uses cr0.eq (set if from kernel),
  * r11, r12 (SRR0), and r9 (SRR1).
  *
@@ -293,16 +304,8 @@ InstructionTLBMiss:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-#ifdef CONFIG_8xx_CPU6
-   stw r10, 0(r0)
-   stw r11, 4(r0)
-#else
-   mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG_SCRATCH2, r11
-#endif
+   EXCEPTION_PROLOG_0
+   mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
addir11, r10, 0x1000
@@ -359,18 +362,11 @@ InstructionTLBMiss:
mtspr   SPRN_MI_RPN, r10/* Update TLB entry */
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
rfi
 2:
mfspr   r11, SPRN_SRR1
@@ -381,19 +377,11 @@ InstructionTLBMiss:
mtspr   SPRN_SRR1, r11
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   li  r11, 0x00f0
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
b   InstructionAccess
 
. = 0x1200
@@ -401,16 +389,8 @@ DataStoreTLBMiss:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-#ifdef CONFIG_8xx_CPU6
-   stw r10, 0(r0)
-   stw r11, 4(r0)
-#else
-   mtspr   SPRN_DAR, r10
-   mtspr   SPRN_SPRG_SCRATCH2, r11
-#endif
+   EXCEPTION_PROLOG_0
+   mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_M_TWB /* Get level 1 table entry address */
 
/* If we are faulting a kernel address, we have to use the
@@ -483,19 +463,12 @@ DataStoreTLBMiss:
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
 
/* Restore registers */
-#ifndef CONFIG_8xx_CPU6
-   mfspr   r10, SPRN_DAR
-   mtcrr10
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   mfspr   r11, SPRN_SPRG_SCRATCH2
-#else
-   mtspr   SPRN_DAR, r11   /* Tag DAR */
-   lwz r11, 0(r0)
-   mtcrr11
-   lwz r11, 4(r0)
+#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
-   mfspr   r10, SPRN_M_TW
+   mtspr   SPRN_DAR, r11   /* Tag DAR */
+   mfspr   r10, SPRN_SPRG_SCRATCH2
+   EXCEPTION_EPILOG_0
rfi
 
 /* This is an instruction TLB error on the MPC8xx.  This could be due
@@ -519,23 +492,18 @@ DataTLBError:
 #ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
 #endif
-   DO_8xx_CPU6(0x3f80, r3)
-   mtspr   SPRN_M_TW, r10  /* Save a couple of working registers */
-   mfcrr10
-   stw r10, 0(r0)
-   stw r11, 4(r0)
+   EXCEPTION_PROLOG_0
+   mtspr   SPRN_SPRG_SCRATCH2

[PATCH v2 03/19] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx

2014-08-29 Thread Christophe Leroy
Exception InstructionAccess does not exist on MPC8xx. No need to branch there 
from somewhere else. 
Handling can be done directly in InstructionTLBError Exception.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   17 +++--
 1 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 3af6db1..fbe5d10 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -234,15 +234,9 @@ DataAccess:
EXC_XFER_LITE(0x300, handle_page_fault)
 
 /* Instruction access exception.
- * This is "never generated" by the MPC8xx.  We jump to it for other
- * translation errors.
+ * This is "never generated" by the MPC8xx.
  */
-   . = 0x400
-InstructionAccess:
-   EXCEPTION_PROLOG
-   mr  r4,r12
-   mr  r5,r9
-   EXC_XFER_LITE(0x400, handle_page_fault)
+   EXCEPTION(0x400, InstructionAccess, unknown_exception, EXC_XFER_STD)
 
 /* External interrupt */
EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE)
@@ -382,7 +376,7 @@ InstructionTLBMiss:
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
-   b   InstructionAccess
+   b   InstructionTLBError
 
. = 0x1200
 DataStoreTLBMiss:
@@ -477,7 +471,10 @@ DataStoreTLBMiss:
  */
. = 0x1300
 InstructionTLBError:
-   b   InstructionAccess
+   EXCEPTION_PROLOG
+   mr  r4,r12
+   mr  r5,r9
+   EXC_XFER_LITE(0x1300, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
  * many reasons, including a dirty update to a pte.  We can catch that
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 05/19] powerpc/8xx: Fix comment about DIRTY update

2014-08-29 Thread Christophe Leroy
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, dirty handling is not
handled here anymore. So we fix the comment.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |8 ++--
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e59e39e..171c6ef 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -477,12 +477,8 @@ InstructionTLBError:
EXC_XFER_LITE(0x1300, handle_page_fault)
 
 /* This is the data TLB error on the MPC8xx.  This could be due to
- * many reasons, including a dirty update to a pte.  We can catch that
- * one here, but anything else is an error.  First, we track down the
- * Linux pte.  If it is valid, write access is allowed, but the
- * page dirty bit is not set, we will set it and reload the TLB.  For
- * any other case, we bail out to a higher level function that can
- * handle it.
+ * many reasons, including a dirty update to a pte.  We bail out to
+ * a higher level function that can handle it.
  */
. = 0x1400
 DataTLBError:
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 04/19] powerpc/8xx: Remove loading of r10 at end of FixupDAR

2014-08-29 Thread Christophe Leroy
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, r10 is not used anymore
after FixupDAR. There is therefore no need to set it up with the value of DAR.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |7 +++
 1 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fbe5d10..e59e39e 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -495,7 +495,7 @@ DataTLBError:
mfspr   r10, SPRN_DAR
cmpwi   cr0, r10, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
-DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of 
DAR */
+DARFixed:/* Return from dcbx instruction bug workaround */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
 #endif
@@ -524,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround, 
r10 holds value of DAR
 
 /* This is the procedure to calculate the data EA for buggy dcbx,dcbi 
instructions
  * by decoding the registers used by the dcbx instruction and adding them.
- * DAR is set to the calculated address and r10 also holds the EA on exit.
+ * DAR is set to the calculated address.
  */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
@@ -564,8 +564,7 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq+142f
cmpwi   cr0, r10, 1964  /* Is icbi? */
beq+142f
-141:   mfspr   r10, SPRN_DAR   /* r10 must hold DAR at exit */
-   b   DARFixed/* Nope, go back to normal TLB processing */
+141:   b   DARFixed/* Nope, go back to normal TLB processing */
 
 144:   mfspr   r10, SPRN_DSISR
rlwinm  r10, r10,0,7,5  /* Clear store bit for buggy dcbst insn */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 10/19] powerpc/8xx: Duplicate two insns instead of branching

2014-08-29 Thread Christophe Leroy
Branching takes two cycles on MPC8xx. Lets duplicate the two instructions
and avoid the branching.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5037420..4a49ff3 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -638,9 +638,11 @@ modified_instr:
 
/* special handling for r10,r11 since these are modified already */
 153:   mfspr   r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */
-   b   155f
+   add r10, r10, r11   /* add it */
+   mfctr   r11 /* restore r11 */
+   b   151b
 154:   mfspr   r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */
-155:   add r10, r10, r11   /* add it */
+   add r10, r10, r11   /* add it */
mfctr   r11 /* restore r11 */
b   151b
 #endif
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 06/19] powerpc/8xx: No need to save r10 and r3 when not calling FixupDAR

2014-08-29 Thread Christophe Leroy
r10 and r3 are only used inside FixupDAR function. So lets save them inside
that function only.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   27 +--
 1 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 171c6ef..845abf8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -482,20 +482,12 @@ InstructionTLBError:
  */
. = 0x1400
 DataTLBError:
-#ifdef CONFIG_8xx_CPU6
-   stw r3, 8(r0)
-#endif
EXCEPTION_PROLOG_0
-   mtspr   SPRN_SPRG_SCRATCH2, r10
 
-   mfspr   r10, SPRN_DAR
-   cmpwi   cr0, r10, 0x00f0
+   mfspr   r11, SPRN_DAR
+   cmpwi   cr0, r11, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-#ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)
-#endif
-   mfspr   r10,SPRN_SPRG_SCRATCH2
EXCEPTION_EPILOG_0
b   DataAccess
 
@@ -525,6 +517,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */
  /* define if you don't want to use self modifying code */
 #define NO_SELF_MODIFYING_CODE
 FixupDAR:/* Entry point for dcbx workaround. */
+#ifdef CONFIG_8xx_CPU6
+   stw r3, 8(r0)
+#endif
+   mtspr   SPRN_SPRG_SCRATCH2, r10
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
andis.  r11, r10, 0x8000/* Address >= 0x8000 */
@@ -540,6 +536,9 @@ FixupDAR:/* Entry point for dcbx workaround. */
mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
mfspr   r11, SPRN_MD_TWC/* and get the pte address */
lwz r11, 0(r11) /* Get the pte */
+#ifdef CONFIG_8xx_CPU6
+   lwz r3, 8(r0)   /* restore r3 from memory */
+#endif
/* concat physical page address(r11) and page offset(r10) */
rlwimi  r11, r10, 0, 20, 31
lwz r11,0(r11)
@@ -560,15 +559,13 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq+142f
cmpwi   cr0, r10, 1964  /* Is icbi? */
beq+142f
-141:   b   DARFixed/* Nope, go back to normal TLB processing */
+141:   mfspr   r10,SPRN_SPRG_SCRATCH2
+   b   DARFixed/* Nope, go back to normal TLB processing */
 
 144:   mfspr   r10, SPRN_DSISR
rlwinm  r10, r10,0,7,5  /* Clear store bit for buggy dcbst insn */
mtspr   SPRN_DSISR, r10
 142:   /* continue, it was a dcbx, dcbi instruction. */
-#ifdef CONFIG_8xx_CPU6
-   lwz r3, 8(r0)   /* restore r3 from memory */
-#endif
 #ifndef NO_SELF_MODIFYING_CODE
andis.  r10,r11,0x1f/* test if reg RA is r0 */
li  r10,modified_instr@l
@@ -587,6 +584,7 @@ modified_instr:
bne+143f
subfr10,r0,r10  /* r10=r10-r0, only if reg RA is r0 */
 143:   mtdar   r10 /* store faulting EA in DAR */
+   mfspr   r10,SPRN_SPRG_SCRATCH2
b   DARFixed/* Go back to normal TLB handling */
 #else
mfctr   r10
@@ -640,6 +638,7 @@ modified_instr:
mfdar   r11
mtctr   r11 /* restore ctr reg from DAR */
mtdar   r10 /* save fault EA to DAR */
+   mfspr   r10,SPRN_SPRG_SCRATCH2
b   DARFixed/* Go back to normal TLB handling */
 
/* special handling for r10,r11 since these are modified already */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 15/19] powerpc/8xx: Implement 16k pages

2014-08-29 Thread Christophe Leroy
This patch activates the handling of 16k pages on the MPC8xx.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/Kconfig   |2 +-
 arch/powerpc/include/asm/mmu-8xx.h |2 ++
 arch/powerpc/kernel/head_8xx.S |4 
 3 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5f44d3b..dc5f64e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -518,7 +518,7 @@ config PPC_4K_PAGES
bool "4k page size"
 
 config PPC_16K_PAGES
-   bool "16k page size" if 44x
+   bool "16k page size" if 44x || PPC_8xx
 
 config PPC_64K_PAGES
bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64
diff --git a/arch/powerpc/include/asm/mmu-8xx.h 
b/arch/powerpc/include/asm/mmu-8xx.h
index 3d11d3c..986b9e1 100644
--- a/arch/powerpc/include/asm/mmu-8xx.h
+++ b/arch/powerpc/include/asm/mmu-8xx.h
@@ -56,6 +56,7 @@
  * additional information from the MI_EPN, and MI_TWC registers.
  */
 #define SPRN_MI_RPN790
+#define MI_SPS16K  0x0008  /* Small page size (0 = 4k, 1 = 16k) */
 
 /* Define an RPN value for mapping kernel memory to large virtual
  * pages for boot initialization.  This has real page number of 0,
@@ -129,6 +130,7 @@
  * additional information from the MD_EPN, and MD_TWC registers.
  */
 #define SPRN_MD_RPN798
+#define MD_SPS16K  0x0008  /* Small page size (0 = 4k, 1 = 16k) */
 
 /* This is a temporary storage register that could be used to save
  * a processor working register during a tablewalk.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 8966262..4dd6be0 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -45,7 +45,11 @@
  * Value for the bits that have fixed value in RPN entries.
  * Also used for tagging DAR for DTLBerror.
  */
+#ifdef CONFIG_PPC_16K_PAGES
+#define RPN_PATTERN(0x00f0 | MD_SPS16K)
+#else
 #define RPN_PATTERN0x00f0
+#endif
 
__HEAD
 _ENTRY(_stext);
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 18/19] powerpc/8xx: _PMD_PRESENT already set in level 1 entries

2014-08-29 Thread Christophe Leroy
When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit
during TLB loading is useless.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 48d3de8..bb7c816 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -340,7 +340,6 @@ InstructionTLBMiss:
/* We have a pte table, so load the MI_TWC with the attributes
 * for this "segment."
 */
-   ori r11,r11,1   /* Set valid bit */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
/* Extract level 2 index */
@@ -417,7 +416,6 @@ DataStoreTLBMiss:
rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
-   ori r11, r11, 1 /* Set valid bit in physical L2 page */
/* Insert the Guarded flag into the TWC from the Linux PTE.
 * It is bit 27 of both the Linux PTE and the TWC (at least
 * I got that right :-).  It will be better when we can put
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 09/19] powerpc/8xx: Optimize verification in FixupDAR

2014-08-29 Thread Christophe Leroy
By XORing the upper part of the instruction code, we get a value that can
directly be verified with the second test and we can remove the first test.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |6 ++
 1 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index e5a250c..5037420 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -542,10 +542,8 @@ FixupDAR:/* Entry point for dcbx workaround. */
 /* Check if it really is a dcbx instruction. */
 /* dcbt and dcbtst does not generate DTLB Misses/Errors,
  * no need to include them here */
-   srwir10, r11, 26/* check if major OP code is 31 */
-   cmpwi   cr0, r10, 31
-   bne-141f
-   rlwinm  r10, r11, 0, 21, 30
+   xoris   r10, r11, 0x7c00/* check if major OP code is 31 */
+   rlwinm  r10, r10, 0, 21, 5
cmpwi   cr0, r10, 2028  /* Is dcbz? */
beq+142f
cmpwi   cr0, r10, 940   /* Is dcbi? */
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 19/19] powerpc/8xx: Don't restore regs to save them again.

2014-08-29 Thread Christophe Leroy
There is not need to restore r10, r11 and cr registers at this end of ITLBmiss
handler as they are saved again to the same place in ITLBError handler we are
jumping to.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index bb7c816..e21f0b2 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -381,8 +381,7 @@ InstructionTLBMiss:
lwz r3, 8(r0)
 #endif
mfspr   r10, SPRN_SPRG_SCRATCH2
-   EXCEPTION_EPILOG_0
-   b   InstructionTLBError
+   b   InstructionTLBError1
 
. = 0x1200
 DataStoreTLBMiss:
@@ -471,7 +470,10 @@ DataStoreTLBMiss:
  */
. = 0x1300
 InstructionTLBError:
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG_0
+InstructionTLBError1:
+   EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_2
mr  r4,r12
mr  r5,r9
EXC_XFER_LITE(0x1300, handle_page_fault)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 17/19] powerpc/8xx: set PTE bit 22 off TLBmiss

2014-08-29 Thread Christophe Leroy
No need to re-set this bit at each TLB miss. Let's set it in the PTE.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/include/asm/pgtable-ppc32.h |   21 +
 arch/powerpc/include/asm/pte-8xx.h   |7 +--
 arch/powerpc/kernel/head_8xx.S   |   10 ++
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h 
b/arch/powerpc/include/asm/pgtable-ppc32.h
index 47edde8..c261792 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -172,6 +172,26 @@ static inline unsigned long pte_update(pte_t *p,
 #ifdef PTE_ATOMIC_UPDATES
unsigned long old, tmp;
 
+#ifdef CONFIG_PPC_8xx
+   unsigned long tmp2;
+
+   __asm__ __volatile__("\
+1: lwarx   %0,0,%4\n\
+   andc%1,%0,%5\n\
+   or  %1,%1,%6\n\
+   /* 0x200 == Extended encoding, bit 22 */ \
+   /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \
+   rlwimi  %1,%1,32-2,0x200\n /* get _PAGE_USER */ \
+   rlwinm  %3,%1,32-1,0x200\n /* get _PAGE_RW */ \
+   or  %1,%3,%1\n\
+   xori%1,%1,0x200\n"
+   PPC405_ERR77(0,%4)
+"  stwcx.  %1,0,%4\n\
+   bne-1b"
+   : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2)
+   : "r" (p), "r" (clr), "r" (set), "m" (*p)
+   : "cc" );
+#else /* CONFIG_PPC_8xx */
__asm__ __volatile__("\
 1: lwarx   %0,0,%3\n\
andc%1,%0,%4\n\
@@ -182,6 +202,7 @@ static inline unsigned long pte_update(pte_t *p,
: "=&r" (old), "=&r" (tmp), "=m" (*p)
: "r" (p), "r" (clr), "r" (set), "m" (*p)
: "cc" );
+#endif /* CONFIG_PPC_8xx */
 #else /* PTE_ATOMIC_UPDATES */
unsigned long old = pte_val(*p);
*p = __pte((old & ~clr) | set);
diff --git a/arch/powerpc/include/asm/pte-8xx.h 
b/arch/powerpc/include/asm/pte-8xx.h
index d44826e..dede1e7 100644
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ b/arch/powerpc/include/asm/pte-8xx.h
@@ -48,19 +48,22 @@
  */
 #define _PAGE_RW   0x0400  /* lsb PP bits, inverted in HW */
 #define _PAGE_USER 0x0800  /* msb PP bits */
+/* set when neither _PAGE_USER nor _PAGE_RW are set */
+#define _PAGE_KNLRO0x0200
 
 #define _PMD_PRESENT   0x0001
 #define _PMD_BAD   0x0ff0
 #define _PMD_PAGE_MASK 0x000c
 #define _PMD_PAGE_8M   0x000c
 
-#define _PTE_NONE_MASK _PAGE_ACCESSED
+#define _PTE_NONE_MASK (_PAGE_ACCESSED | _PAGE_KNLRO)
 
 /* Until my rework is finished, 8xx still needs atomic PTE updates */
 #define PTE_ATOMIC_UPDATES 1
 
 /* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO(_PAGE_SHARED)
+#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_KNLRO)
+#define _PAGE_KERNEL_ROX   (_PAGE_EXEC | _PAGE_KNLRO)
 #define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index a7af26e..48d3de8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -445,14 +445,8 @@ DataStoreTLBMiss:
and r11, r11, r10
rlwimi  r10, r11, 0, _PAGE_PRESENT
 #endif
-   /* Honour kernel RO, User NA */
-   /* 0x200 == Extended encoding, bit 22 */
-   rlwimi  r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */
-   /* r11 =  (r10 & _PAGE_RW) >> 1 */
-   rlwinm  r11, r10, 32-1, 0x200
-   or  r10, r11, r10
-   /* invert RW and 0x200 bits */
-   xorir10, r10, _PAGE_RW | 0x200
+   /* invert RW */
+   xorir10, r10, _PAGE_RW
 
/* The Linux PTE won't go exactly into the MMU TLB.
 * Software indicator bits 22 and 28 must be clear.
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 12/19] powerpc/8xx: Don't use MD_TWC for walk

2014-08-29 Thread Christophe Leroy
MD_TWC can only be used properly with 4k pages.
So lets calculate level 2 table index by ourselves.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   28 
 1 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index ad15070..0f571f5 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -297,8 +297,6 @@ InstructionTLBMiss:
addir11, r10, -0x1000
tlbie   r11
 #endif
-   DO_8xx_CPU6(0x3780, r3)
-   mtspr   SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN 
can't */
 
/* If we are faulting a kernel address, we have to use the
 * kernel page tables.
@@ -326,10 +324,9 @@ InstructionTLBMiss:
ori r11,r11,1   /* Set valid bit */
DO_8xx_CPU6(0x2b80, r3)
mtspr   SPRN_MI_TWC, r11/* Set segment attributes */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r11, SPRN_MD_TWC/* and get the pte address */
-   lwz r10, 0(r11) /* Get the pte */
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
andi.   r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT
@@ -395,12 +392,13 @@ DataStoreTLBMiss:
 
/* We have a pte table, so load fetch the pte from the table.
 */
-   ori r11, r11, 1 /* Set valid bit in physical L2 page */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r10, SPRN_MD_TWC/* and get the pte address */
+   mfspr   r10, SPRN_MD_EPN/* Get address of fault */
+   /* Extract level 2 index */
+   rlwinm  r10, r10, 22, 20, 29
+   rlwimi  r10, r11, 0, 0, 19  /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
+   ori r11, r11, 1 /* Set valid bit in physical L2 page */
/* Insert the Guarded flag into the TWC from the Linux PTE.
 * It is bit 27 of both the Linux PTE and the TWC (at least
 * I got that right :-).  It will be better when we can put
@@ -524,18 +522,16 @@ FixupDAR:/* Entry point for dcbx workaround. */
/* fetch instruction from memory. */
mfspr   r10, SPRN_SRR0
andis.  r11, r10, 0x8000/* Address >= 0x8000 */
-   DO_8xx_CPU6(0x3780, r3)
-   mtspr   SPRN_MD_EPN, r10
mfspr   r11, SPRN_M_TW  /* Get level 1 table base address */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3: rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
lwzxr11, r10, r11   /* Get the level 1 entry */
-   DO_8xx_CPU6(0x3b80, r3)
-   mtspr   SPRN_MD_TWC, r11/* Load pte table base address */
-   mfspr   r11, SPRN_MD_TWC/* and get the pte address */
-   lwz r11, 0(r11) /* Get the pte */
+   rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
+   mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
+   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   lwzxr11, r10, r11   /* Get the pte */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)   /* restore r3 from memory */
 #endif
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 08/19] powerpc/8xx: No need to restore registers and save them again.

2014-08-29 Thread Christophe Leroy
In DTLBError handler there is not need to restore r10, r11 and cr registers
after fixing DAR as they are saved again to the same place just after.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5f04d5f..e5a250c 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -478,8 +478,8 @@ DataTLBError:
cmpwi   cr0, r11, 0x00f0
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
-   EXCEPTION_EPILOG_0
-   EXCEPTION_PROLOG
+   EXCEPTION_PROLOG_1
+   EXCEPTION_PROLOG_2
mfspr   r10,SPRN_DSISR
stw r10,_DSISR(r11)
mr  r5,r10
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 07/19] powerpc/8xx: DataAccess exception not generated by MPC8xx

2014-08-29 Thread Christophe Leroy
DataAccess exception is never generated by MPC8xx so do the job directly where
it is used to avoid an unnecessary branching.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   23 ++-
 1 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 845abf8..5f04d5f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -219,19 +219,9 @@ MachineCheck:
EXC_XFER_STD(0x200, machine_check_exception)
 
 /* Data access exception.
- * This is "never generated" by the MPC8xx.  We jump to it for other
- * translation errors.
+ * This is "never generated" by the MPC8xx.
  */
-   . = 0x300
-DataAccess:
-   EXCEPTION_PROLOG
-   mfspr   r10,SPRN_DSISR
-   stw r10,_DSISR(r11)
-   mr  r5,r10
-   mfspr   r4,SPRN_DAR
-   li  r10,0x00f0
-   mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
-   EXC_XFER_LITE(0x300, handle_page_fault)
+   EXCEPTION(0x300, DataAccess, unknown_exception, EXC_XFER_STD)
 
 /* Instruction access exception.
  * This is "never generated" by the MPC8xx.
@@ -489,7 +479,14 @@ DataTLBError:
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_EPILOG_0
-   b   DataAccess
+   EXCEPTION_PROLOG
+   mfspr   r10,SPRN_DSISR
+   stw r10,_DSISR(r11)
+   mr  r5,r10
+   mfspr   r4,SPRN_DAR
+   li  r10,0x00f0
+   mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
+   EXC_XFER_LITE(0x1400, handle_page_fault)
 
EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 13/19] powerpc/8xx: Use PAGE size related consts

2014-08-29 Thread Christophe Leroy
For PAGE size related operations, use PAGE size consts in order to be able to
use different page size in the futur.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   30 ++
 1 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 0f571f5..dcaee9f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -292,9 +292,9 @@ InstructionTLBMiss:
mtspr   SPRN_SPRG_SCRATCH2, r10
mfspr   r10, SPRN_SRR0  /* Get effective address of fault */
 #ifdef CONFIG_8xx_CPU15
-   addir11, r10, 0x1000
+   addir11, r10, PAGE_SIZE
tlbie   r11
-   addir11, r10, -0x1000
+   addir11, r10, -PAGE_SIZE
tlbie   r11
 #endif
 
@@ -313,7 +313,8 @@ InstructionTLBMiss:
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
 #endif
-   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
@@ -325,7 +326,8 @@ InstructionTLBMiss:
DO_8xx_CPU6(0x2b80, r3)
mtspr   SPRN_MI_TWC, r11/* Set segment attributes */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   /* Extract level 2 index */
+   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr10, r10, r11   /* Get the pte */
 
 #ifdef CONFIG_SWAP
@@ -385,7 +387,8 @@ DataStoreTLBMiss:
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
 3:
-   rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+   rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */
beq 2f  /* If zero, don't try to find a pte */
@@ -394,8 +397,8 @@ DataStoreTLBMiss:
 */
mfspr   r10, SPRN_MD_EPN/* Get address of fault */
/* Extract level 2 index */
-   rlwinm  r10, r10, 22, 20, 29
-   rlwimi  r10, r11, 0, 0, 19  /* Add level 2 base */
+   rlwinm  r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+   rlwimi  r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
 
ori r11, r11, 1 /* Set valid bit in physical L2 page */
@@ -526,18 +529,20 @@ FixupDAR:/* Entry point for dcbx workaround. */
beq-3f  /* Branch if user space */
lis r11, (swapper_pg_dir-PAGE_OFFSET)@h
ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l
-3: rlwinm  r10, r10, 12, 20, 29/* Extract level 1 index */
+   /* Extract level 1 index */
+3: rlwinm  r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 
29
lwzxr11, r10, r11   /* Get the level 1 entry */
rlwinm  r10, r11,0,0,19 /* Extract page descriptor page address */
mfspr   r11, SPRN_SRR0  /* Get effective address of fault */
-   rlwinm  r11, r11, 22, 20, 29/* Extract level 2 index */
+   /* Extract level 2 index */
+   rlwinm  r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
lwzxr11, r10, r11   /* Get the pte */
 #ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)   /* restore r3 from memory */
 #endif
/* concat physical page address(r11) and page offset(r10) */
mfspr   r10, SPRN_SRR0
-   rlwimi  r11, r10, 0, 20, 31
+   rlwimi  r11, r10, 0, 32 - PAGE_SHIFT, 31
lwz r11,0(r11)
 /* Check if it really is a dcbx instruction. */
 /* dcbt and dcbtst does not generate DTLB Misses/Errors,
@@ -913,12 +918,13 @@ set_dec_cpu6:
.globl  sdata
 sdata:
.globl  empty_zero_page
+   .align  PAGE_SHIFT
 empty_zero_page:
-   .space  4096
+   .space  PAGE_SIZE
 
.globl  swapper_pg_dir
 swapper_pg_dir:
-   .space  4096
+   .space  PGD_TABLE_SIZE
 
 /* Room for two PTE table poiners, usually the kernel and current user
  * pointer to their respective root page table (pgdir).
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 14/19] powerpc/8xx: Const for TLB RPN forced value

2014-08-29 Thread Christophe Leroy
Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked
to the page size and will vary when we change the page size. Lets define a const
for it in order to have it at only one place.

Signed-off-by: Christophe Leroy 

---
 arch/powerpc/kernel/head_8xx.S |   19 +--
 1 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index dcaee9f..8966262 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -40,6 +40,13 @@
 #else
 #define DO_8xx_CPU6(val, reg)
 #endif
+
+/*
+ * Value for the bits that have fixed value in RPN entries.
+ * Also used for tagging DAR for DTLBerror.
+ */
+#define RPN_PATTERN0x00f0
+
__HEAD
 _ENTRY(_stext);
 _ENTRY(_start);
@@ -211,7 +218,7 @@ MachineCheck:
EXCEPTION_PROLOG
mfspr r4,SPRN_DAR
stw r4,_DAR(r11)
-   li r5,0x00f0
+   li r5,RPN_PATTERN
mtspr SPRN_DAR,r5   /* Tag DAR, to be used in DTLB Error */
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
@@ -237,7 +244,7 @@ Alignment:
EXCEPTION_PROLOG
mfspr   r4,SPRN_DAR
stw r4,_DAR(r11)
-   li  r5,0x00f0
+   li  r5,RPN_PATTERN
mtspr   SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */
mfspr   r5,SPRN_DSISR
stw r5,_DSISR(r11)
@@ -341,7 +348,7 @@ InstructionTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-   li  r11, 0x00f0
+   li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */
DO_8xx_CPU6(0x2d80, r3)
mtspr   SPRN_MI_RPN, r10/* Update TLB entry */
@@ -445,7 +452,7 @@ DataStoreTLBMiss:
 * set.  All other Linux PTE bits control the behavior
 * of the MMU.
 */
-2: li  r11, 0x00f0
+2: li  r11, RPN_PATTERN
rlwimi  r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
DO_8xx_CPU6(0x3d80, r3)
mtspr   SPRN_MD_RPN, r10/* Update TLB entry */
@@ -479,7 +486,7 @@ DataTLBError:
EXCEPTION_PROLOG_0
 
mfspr   r11, SPRN_DAR
-   cmpwi   cr0, r11, 0x00f0
+   cmpwi   cr0, r11, RPN_PATTERN
beq-FixupDAR/* must be a buggy dcbX, icbi insn. */
 DARFixed:/* Return from dcbx instruction bug workaround */
EXCEPTION_PROLOG_1
@@ -488,7 +495,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */
stw r10,_DSISR(r11)
mr  r5,r10
mfspr   r4,SPRN_DAR
-   li  r10,0x00f0
+   li  r10,RPN_PATTERN
mtspr   SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */
EXC_XFER_LITE(0x1400, handle_page_fault)
 
-- 
1.7.1

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

  1   2   3   4   5   6   7   8   9   10   >