[PATCH v3 01/17] crypto: talitos - Use zero entry to init descriptors ptrs to zero
Do use zero_entry value to init the descriptors ptrs to zero instead of writing 0 in each field Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414a..7bf1b2b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1373,9 +1373,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, int sg_count, ret; /* first DWORD empty */ - desc->ptr[0].len = 0; - to_talitos_ptr(&desc->ptr[0], 0); - desc->ptr[0].j_extent = 0; + desc->ptr[0] = zero_entry; /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); @@ -1445,9 +1443,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, DMA_FROM_DEVICE); /* last DWORD empty */ - desc->ptr[6].len = 0; - to_talitos_ptr(&desc->ptr[6], 0); - desc->ptr[6].j_extent = 0; + desc->ptr[6] = zero_entry; ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 00/17] crypto: talitos - Add support for SEC1
The purpose of this set of patchs is to add to talitos crypto driver the support for the SEC1 version of the security engine, which is found in mpc885 and mpc8272 processors. v3 is a complete rework of the patchset. Since a kernel can be built with support for both MPC82xx and MPC83xx at the same time, talitos driver shall support both SEC1 and SEC2+ at the same time. Based on cryptodev-2.6 tree Christophe Leroy (17): crypto: talitos - Use zero entry to init descriptors ptrs to zero crypto: talitos - Refactor the sg in/out chain allocation crypto: talitos - talitos_ptr renamed ptr for more lisibility crypto: talitos - Add a helper function to clear j_extent field crypto: talitos - remove param 'extent' in map_single_talitos_ptr() crypto: talitos - helper function for ptr len crypto: talitos - enhanced talitos_desc struct for SEC1 crypto: talitos - add sub-choice in talitos CONFIG for SEC1 crypto: talitos - Add a feature to tag SEC1 crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+ crypto: talitos - adaptation of talitos_submit() for SEC1 crypto: talitos - base address for Execution Units crypto: talitos - adapt interrupts and reset functions to SEC1 crypto: talitos - implement scatter/gather copy for SEC1 crypto: talitos - SEC1 bugs on 0 data hash crypto: talitos - Add fsl,sec1.0 compatible crypto: talitos - Update DT bindings with SEC1 .../devicetree/bindings/crypto/fsl-sec2.txt| 6 +- drivers/crypto/Kconfig | 18 + drivers/crypto/talitos.c | 727 +++-- drivers/crypto/talitos.h | 153 +++-- 4 files changed, 644 insertions(+), 260 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 03/17] crypto: talitos - talitos_ptr renamed ptr for more lisibility
Linux CodyingStyle recommends to use short variables for local variables. ptr is just good enough for those 3 lines functions. It helps keep single lines shorter than 80 characters. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 5a7e345..fca0aed 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -55,37 +55,37 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) { - talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - talitos_ptr->eptr = upper_32_bits(dma_addr); + ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); + ptr->eptr = upper_32_bits(dma_addr); } /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ static void map_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, + struct talitos_ptr *ptr, unsigned short len, void *data, unsigned char extent, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); - talitos_ptr->len = cpu_to_be16(len); - to_talitos_ptr(talitos_ptr, dma_addr); - talitos_ptr->j_extent = extent; + ptr->len = cpu_to_be16(len); + to_talitos_ptr(ptr, dma_addr); + ptr->j_extent = extent; } /* * unmap bus single (contiguous) h/w descriptor pointer */ static void unmap_single_talitos_ptr(struct device *dev, -struct talitos_ptr *talitos_ptr, +struct talitos_ptr *ptr, enum dma_data_direction dir) { - dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr), -be16_to_cpu(talitos_ptr->len), dir); + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), +be16_to_cpu(ptr->len), dir); } static int reset_channel(struct device *dev, int ch) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 02/17] crypto: talitos - Refactor the sg in/out chain allocation
This patch refactors the handling of the input and output data that is quite similar in several functions Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 159 --- 1 file changed, 81 insertions(+), 78 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 7bf1b2b..5a7e345 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1327,16 +1327,23 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, +struct scatterlist *dst, unsigned int len, +struct talitos_edesc *edesc) +{ + talitos_sg_unmap(dev, edesc, src, dst); +} + static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + + unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1358,6 +1365,65 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + + ptr->len = cpu_to_be16(len); + ptr->j_extent = 0; + + sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, + edesc->src_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src)); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed */ + to_talitos_ptr(ptr, sg_dma_address(src)); + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + ptr->len = cpu_to_be16(len); + ptr->j_extent = 0; + + if (dir != DMA_NONE) + sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, + dir, edesc->dst_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst)); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr)); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + sg_count = sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + } +} + static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1387,56 +1453,16 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* * cipher in */ - desc->ptr[3].len = cpu_to_be16(cryptlen); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_chained);
[PATCH v3 06/17] crypto: talitos - helper function for ptr len
This patch adds a helper function for reads and writes of the len param of the talitos descriptor. This will help implement SEC1 later. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 24 +--- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 81e5636..bca6ded 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -61,6 +61,16 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) ptr->eptr = upper_32_bits(dma_addr); } +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len) +{ + ptr->len = cpu_to_be16(len); +} + +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr) +{ + return be16_to_cpu(ptr->len); +} + static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) { ptr->j_extent = 0; @@ -76,7 +86,7 @@ static void map_single_talitos_ptr(struct device *dev, { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr(ptr, dma_addr); to_talitos_ptr_extent_clear(ptr); } @@ -89,7 +99,7 @@ static void unmap_single_talitos_ptr(struct device *dev, enum dma_data_direction dir) { dma_unmap_single(dev, be32_to_cpu(ptr->ptr), -be16_to_cpu(ptr->len), dir); +from_talitos_ptr_len(ptr), dir); } static int reset_channel(struct device *dev, int ch) @@ -1375,7 +1385,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, { int sg_count; - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr_extent_clear(ptr); sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, @@ -1405,7 +1415,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, enum dma_data_direction dir, struct talitos_ptr *ptr, int sg_count) { - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr_extent_clear(ptr); if (dir != DMA_NONE) @@ -1447,7 +1457,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); - desc->ptr[1].len = cpu_to_be16(ivsize); + to_talitos_ptr_len(&desc->ptr[1], ivsize); to_talitos_ptr_extent_clear(&desc->ptr[1]); /* cipher key */ @@ -1539,11 +1549,11 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); /* When using hashctx-in, must unmap it. */ - if (edesc->desc.ptr[1].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[1])) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - if (edesc->desc.ptr[2].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[2])) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 05/17] crypto: talitos - remove param 'extent' in map_single_talitos_ptr()
map_single_talitos_ptr() is always called with extent == 0, so lets remove this unused parameter Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c93f79b..81e5636 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -72,14 +72,13 @@ static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) static void map_single_talitos_ptr(struct device *dev, struct talitos_ptr *ptr, unsigned short len, void *data, - unsigned char extent, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); ptr->len = cpu_to_be16(len); to_talitos_ptr(ptr, dma_addr); - ptr->j_extent = extent; + to_talitos_ptr_extent_clear(ptr); } /* @@ -958,7 +957,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, - 0, DMA_TO_DEVICE); + DMA_TO_DEVICE); /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize); @@ -1002,7 +1001,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, 0, + (char *)&ctx->key + ctx->authkeylen, DMA_TO_DEVICE); /* @@ -1080,7 +1079,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, DMA_FROM_DEVICE); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); @@ -1453,7 +1452,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); /* * cipher in @@ -1470,7 +1469,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, &desc->ptr[4], sg_count); /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, DMA_FROM_DEVICE); /* last DWORD empty */ @@ -1595,7 +1594,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (!req_ctx->first || req_ctx->swinit) { map_single_talitos_ptr(dev, &desc->ptr[1], req_ctx->hw_context_size, - (char *)req_ctx->hw_context, 0, + (char *)req_ctx->hw_context, DMA_TO_DEVICE); req_ctx->swinit = 0; } else { @@ -1607,7 +1606,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* HMAC key */ if (ctx->keylen) map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); else desc->ptr[2] = zero_entry; @@ -1624,11 +1623,11 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (req_ctx->last) map_single_talitos_ptr(dev, &desc->ptr[5], crypto_ahash_digestsize(tfm), - areq->result, 0, DMA_FROM_DEVICE); + areq->result, DMA_FROM_DEVICE); else map_single_talitos_ptr(dev, &desc->ptr[5], req_ctx->hw_context_size, - req_ctx->hw_context, 0, DMA_FROM_DEVICE); + req_ctx->hw_context, DMA_FROM_DEVICE); /* last DWORD empty */ desc->ptr[6] = zero_entry; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 04/17] crypto: talitos - Add a helper function to clear j_extent field
j_extent field is specific to SEC2 so we add a helper function to clear it so that SEC1 can redefine that function as nop Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index fca0aed..c93f79b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -61,6 +61,11 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) ptr->eptr = upper_32_bits(dma_addr); } +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) +{ + ptr->j_extent = 0; +} + /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ @@ -1372,7 +1377,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, int sg_count; ptr->len = cpu_to_be16(len); - ptr->j_extent = 0; + to_talitos_ptr_extent_clear(ptr); sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, edesc->src_chained); @@ -1402,7 +1407,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, struct talitos_ptr *ptr, int sg_count) { ptr->len = cpu_to_be16(len); - ptr->j_extent = 0; + to_talitos_ptr_extent_clear(ptr); if (dir != DMA_NONE) sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, @@ -1444,7 +1449,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); desc->ptr[1].len = cpu_to_be16(ivsize); - desc->ptr[1].j_extent = 0; + to_talitos_ptr_extent_clear(&desc->ptr[1]); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 07/17] crypto: talitos - enhanced talitos_desc struct for SEC1
This patch enhances the talitos_desc struct with fields for SEC1. SEC1 has only one header field, and has a 'next_desc' field in addition. This mixed descriptor will continue to fit SEC2, and for SEC1 we will recopy hdr value into hdr1 value in talitos_submit() Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.h | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 61a1405..f078da1 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -37,9 +37,17 @@ /* descriptor pointer entry */ struct talitos_ptr { - __be16 len; /* length */ - u8 j_extent;/* jump to sg link table and/or extent */ - u8 eptr;/* extended address */ + union { + struct {/* SEC2 format */ + __be16 len; /* length */ + u8 j_extent;/* jump to sg link table and/or extent*/ + u8 eptr;/* extended address */ + }; + struct {/* SEC1 format */ + __be16 res; + __be16 len1;/* length */ + }; + }; __be32 ptr; /* address */ }; @@ -53,8 +61,12 @@ static const struct talitos_ptr zero_entry = { /* descriptor */ struct talitos_desc { __be32 hdr; /* header high bits */ - __be32 hdr_lo; /* header low bits */ + union { + __be32 hdr_lo; /* header low bits */ + __be32 hdr1;/* header for SEC1 */ + }; struct talitos_ptr ptr[7]; /* ptr/len pair array */ + __be32 next_desc; /* next descriptor (SEC1) */ }; /** -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 08/17] crypto: talitos - add sub-choice in talitos CONFIG for SEC1
This patch adds a CONFIG option to select SEC1, SEC2+ or both. Signed-off-by: Christophe Leroy --- drivers/crypto/Kconfig | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 800bf41..8a76a01 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS To compile this driver as a module, choose M here: the module will be called talitos. +config CRYPTO_DEV_TALITOS1 + bool "SEC1 (SEC 1.0 and SEC Lite 1.2)" + depends on CRYPTO_DEV_TALITOS + depends on PPC_8xx || PPC_82xx + default y + help + Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0 + found on MPC82xx or the Freescale Security Engine (SEC Lite) + version 1.2 found on MPC8xx + +config CRYPTO_DEV_TALITOS2 + bool "SEC2+ (SEC version 2.0 or upper)" + depends on CRYPTO_DEV_TALITOS + default y if !PPC_8xx + help + Say 'Y' here to use the Freescale Security Engine (SEC) + version 2 and following as found on MPC83xx, MPC85xx, etc ... + config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/17] crypto: talitos - Add a feature to tag SEC1
We add a new feature in the features field, to mark compatible "fsl,sec1.0" We also define a helper function called has_ftr_sec1() to help functions quickly determine if they are running on SEC1 or SEC2+. When only SEC1 or SEC2 is compiled in, has_ftr_sec1() return trivial corresponding value. If both are compiled in, feature field is checked. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 3 +++ drivers/crypto/talitos.h | 17 + 2 files changed, 20 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index bca6ded..db95023 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2709,6 +2709,9 @@ static int talitos_probe(struct platform_device *ofdev) TALITOS_FTR_SHA224_HWINIT | TALITOS_FTR_HMAC_OK; + if (of_device_is_compatible(np, "fsl,sec1.0")) + priv->features |= TALITOS_FTR_SEC1; + priv->chan = kzalloc(sizeof(struct talitos_channel) * priv->num_channels, GFP_KERNEL); if (!priv->chan) { diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index f078da1..b0bdb4e 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -156,6 +156,23 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_FTR_HW_AUTH_CHECK 0x0002 #define TALITOS_FTR_SHA224_HWINIT 0x0004 #define TALITOS_FTR_HMAC_OK 0x0008 +#define TALITOS_FTR_SEC1 0x0010 + +/* + * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are + * defined, we check the features which are set according to the device tree. + * Otherwise, we answer true or false directly + */ +static inline bool has_ftr_sec1(struct talitos_private *priv) +{ +#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2) + return priv->features & TALITOS_FTR_SEC1 ? true : false; +#elif defined(CONFIG_CRYPTO_DEV_TALITOS1) + return true; +#else + return false; +#endif +} /* * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 10/17] crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+
talitos descriptor is slightly different for SEC1 and SEC2+, so lets the helper function that fills the descriptor take into account the type of SEC. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 105 ++- 1 file changed, 67 insertions(+), 38 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index db95023..678b528 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -55,25 +55,38 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, + bool is_sec1) { ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - ptr->eptr = upper_32_bits(dma_addr); + if (!is_sec1) + ptr->eptr = upper_32_bits(dma_addr); } -static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len) +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len, + bool is_sec1) { - ptr->len = cpu_to_be16(len); + if (is_sec1) { + ptr->res = 0; + ptr->len1 = cpu_to_be16(len); + } else { + ptr->len = cpu_to_be16(len); + } } -static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr) +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, + bool is_sec1) { - return be16_to_cpu(ptr->len); + if (is_sec1) + return be16_to_cpu(ptr->len1); + else + return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) { - ptr->j_extent = 0; + if (!is_sec1) + ptr->j_extent = 0; } /* @@ -85,10 +98,12 @@ static void map_single_talitos_ptr(struct device *dev, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); - to_talitos_ptr_len(ptr, len); - to_talitos_ptr(ptr, dma_addr); - to_talitos_ptr_extent_clear(ptr); + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr(ptr, dma_addr, is_sec1); + to_talitos_ptr_extent_clear(ptr, is_sec1); } /* @@ -98,8 +113,11 @@ static void unmap_single_talitos_ptr(struct device *dev, struct talitos_ptr *ptr, enum dma_data_direction dir) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), -from_talitos_ptr_len(ptr), dir); +from_talitos_ptr_len(ptr, is_sec1), dir); } static int reset_channel(struct device *dev, int ch) @@ -922,7 +940,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, int n_sg = sg_count; while (n_sg--) { - to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg)); + to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0); link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg)); link_tbl_ptr->j_extent = 0; link_tbl_ptr++; @@ -976,7 +994,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; /* assoc_nents - 1 entries for assoc, 1 for IV */ @@ -987,7 +1005,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_ptr += sg_count - 1; tbl_ptr->j_extent = 0; tbl_ptr++; - to_talitos_ptr(tbl_ptr, edesc->iv_dma); + to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0); tbl_ptr->len = cpu_to_be16(ivsize); tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -996,14 +1014,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } else { if (areq->assoclen) to_talitos_ptr(&desc->ptr[1], - sg_dma_address(areq->assoc)); + sg_dma_address(areq->assoc), 0); else - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma)
[PATCH v3 11/17] crypto: talitos - adaptation of talitos_submit() for SEC1
SEC1 descriptor is a bit different to SEC2+ descriptor. talitos_submit() will have to copy hdr field into hdr1 field and send the descriptor starting at hdr1 up to next_desc. For SEC2, it remains unchanged and next_desc is just ignored. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 23 +++ drivers/crypto/talitos.h | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 678b528..e6ea651 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -236,6 +236,7 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, struct talitos_request *request; unsigned long flags; int head; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].head_lock, flags); @@ -249,8 +250,17 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, request = &priv->chan[ch].fifo[head]; /* map descriptor and save caller data */ - request->dma_desc = dma_map_single(dev, desc, sizeof(*desc), - DMA_BIDIRECTIONAL); + if (is_sec1) { + desc->hdr1 = desc->hdr; + desc->next_desc = 0; + request->dma_desc = dma_map_single(dev, &desc->hdr1, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } else { + request->dma_desc = dma_map_single(dev, desc, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } request->callback = callback; request->context = context; @@ -282,16 +292,21 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) struct talitos_request *request, saved_req; unsigned long flags; int tail, status; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].tail_lock, flags); tail = priv->chan[ch].tail; while (priv->chan[ch].fifo[tail].desc) { + __be32 hdr; + request = &priv->chan[ch].fifo[tail]; /* descriptors with their done bits set don't get the error */ rmb(); - if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE) + hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr; + + if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE) status = 0; else if (!error) @@ -300,7 +315,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) status = error; dma_unmap_single(dev, request->dma_desc, -sizeof(struct talitos_desc), +TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL); /* copy entries so we can call callback outside lock */ diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index b0bdb4e..f827c04 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -69,6 +69,8 @@ struct talitos_desc { __be32 next_desc; /* next descriptor (SEC1) */ }; +#define TALITOS_DESC_SIZE (sizeof(struct talitos_desc) - sizeof(__be32)) + /** * talitos_request - descriptor submission request * @desc: descriptor pointer (kernel virtual) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 12/17] crypto: talitos - base address for Execution Units
SEC 1.0, 1.2 and 2.x+ have different EU base addresses, so we need to define pointers for each EU in the driver private data structure. The proper address is set by the probe function depending on the SEC type, in order to provide access to the proper address. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 83 drivers/crypto/talitos.h | 72 + 2 files changed, 100 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index e6ea651..6d77699 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -208,7 +208,7 @@ static int init_device(struct device *dev) /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) - setbits32(priv->reg + TALITOS_MDEUICR_LO, + setbits32(priv->reg_mdeu + TALITOS_EUICR_LO, TALITOS_MDEUICR_LO_ICE); return 0; @@ -424,44 +424,44 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: dev_err(dev, "AFEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AFEUISR), - in_be32(priv->reg + TALITOS_AFEUISR_LO)); + in_be32(priv->reg_afeu + TALITOS_EUISR), + in_be32(priv->reg_afeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_DEU: dev_err(dev, "DEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_DEUISR), - in_be32(priv->reg + TALITOS_DEUISR_LO)); + in_be32(priv->reg_deu + TALITOS_EUISR), + in_be32(priv->reg_deu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_MDEUA: case DESC_HDR_SEL0_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_RNG: dev_err(dev, "RNGUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_RNGUISR), - in_be32(priv->reg + TALITOS_RNGUISR_LO)); + in_be32(priv->reg_rngu + TALITOS_ISR), + in_be32(priv->reg_rngu + TALITOS_ISR_LO)); break; case DESC_HDR_SEL0_PKEU: dev_err(dev, "PKEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_PKEUISR), - in_be32(priv->reg + TALITOS_PKEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_AESU: dev_err(dev, "AESUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AESUISR), - in_be32(priv->reg + TALITOS_AESUISR_LO)); + in_be32(priv->reg_aesu + TALITOS_EUISR), + in_be32(priv->reg_aesu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_KEU: dev_err(dev, "KEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_KEUISR), - in_be32(priv->reg + TALITOS_KEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; } @@ -469,13 +469,13 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) case DESC_HDR_SEL1_MDEUA: case DESC_HDR_SEL1_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL1_CRCU: dev_err(dev,
[PATCH v3 13/17] crypto: talitos - adapt interrupts and reset functions to SEC1
This patch adapts the interrupts handling and reset function for SEC1. On SEC1, registers are almost similar to SEC2+, but bits are sometimes located at different places. So we need to define TALITOS1 and TALITOS2 versions of some fields, and manage according to whether it is SEC1 or SEC2. On SEC1, only one interrupt vector is dedicated to the SEC, so only interrupt_4ch is needed. On SEC1, interrupts are enabled by clearing related bits in IMR, while on SEC2, interrupts are enabled by seting the bits in IMR. SEC1 also performs parity verification in the DES Unit. We have to disable this feature because the test vectors provided in the kernel have parity errors. In reset functions, only SEC2 supports continuation after error. For SEC1, we have to reset in all cases. For errors handling, SEC2+ names have been kept, but displayed text have been amended to reflect exact meaning on SEC1. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 227 +++ drivers/crypto/talitos.h | 39 +--- 2 files changed, 199 insertions(+), 67 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 6d77699..1265405 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -124,12 +124,23 @@ static int reset_channel(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; + bool is_sec1 = has_ftr_sec1(priv); - setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET); + if (is_sec1) { + setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, + TALITOS1_CCCR_LO_RESET); - while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET) - && --timeout) - cpu_relax(); + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) & + TALITOS1_CCCR_LO_RESET) && --timeout) + cpu_relax(); + } else { + setbits32(priv->chan[ch].reg + TALITOS_CCCR, + TALITOS2_CCCR_RESET); + + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & + TALITOS2_CCCR_RESET) && --timeout) + cpu_relax(); + } if (timeout == 0) { dev_err(dev, "failed to reset channel %d\n", ch); @@ -152,11 +163,12 @@ static int reset_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - u32 mcr = TALITOS_MCR_SWR; + bool is_sec1 = has_ftr_sec1(priv); + u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR; setbits32(priv->reg + TALITOS_MCR, mcr); - while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR) + while ((in_be32(priv->reg + TALITOS_MCR) & mcr) && --timeout) cpu_relax(); @@ -180,6 +192,7 @@ static int init_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); int ch, err; + bool is_sec1 = has_ftr_sec1(priv); /* * Master reset @@ -203,8 +216,15 @@ static int init_device(struct device *dev) } /* enable channel done and error interrupts */ - setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT); - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); + if (is_sec1) { + clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT); + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); + /* disable parity error check in DEU (erroneous? test vect.) */ + setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE); + } else { + setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT); + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); + } /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) @@ -349,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) /* * process completed requests for channels that have done status */ -#define DEF_TALITOS_DONE(name, ch_done_mask) \ -static void talitos_done_##name(unsigned long data)\ +#define DEF_TALITOS1_DONE(name, ch_done_mask) \ +static void talitos1_done_##name(unsigned long data) \ +{ \ + struct device *dev = (struct device *)data; \ + struct talitos_private *priv = dev_get_drvdata(dev);\ + unsigned long flags;
[PATCH v3 14/17] crypto: talitos - implement scatter/gather copy for SEC1
SEC1 doesn't support scatter/gather, SEC1 doesn't handle link tables. Therefore, for SEC1 we have to do it by SW. For that, we reserve space at the end of the extended descriptor, in lieu of the space reserved for the link tables on SEC2, and we perform sg_copy() when preparing the descriptors We also adapt the max buffer size which is only 32k on SEC1 while it is 64k on SEC2+ Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 138 ++- drivers/crypto/talitos.h | 3 +- 2 files changed, 103 insertions(+), 38 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 1265405..dddf4b3 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -866,9 +866,10 @@ badkey: * @dst_chained: whether dst is chained or not * @iv_dma: dma address of iv for checking continuity and link table * @dma_len: length of dma mapped link_tbl space - * @dma_link_tbl: bus physical address of link_tbl + * @dma_link_tbl: bus physical address of link_tbl/buf * @desc: h/w descriptor - * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) + * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2) + * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1) * * if decrypting (with authcheck), or either one of src_nents or dst_nents * is greater than 1, an integrity check value is concatenated to the end @@ -885,7 +886,10 @@ struct talitos_edesc { int dma_len; dma_addr_t dma_link_tbl; struct talitos_desc desc; - struct talitos_ptr link_tbl[0]; + union { + struct talitos_ptr link_tbl[0]; + u8 buf[0]; + }; }; static int talitos_map_sg(struct device *dev, struct scatterlist *sg, @@ -1282,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) { + if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } @@ -1327,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, */ alloc_len = sizeof(struct talitos_edesc); if (assoc_nents || src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 2 + assoc_nents) * - sizeof(struct talitos_ptr) + authsize; + if (is_sec1) + dma_len = src_nents ? cryptlen : 0 + + dst_nents ? cryptlen : 0; + else + dma_len = (src_nents + dst_nents + 2 + assoc_nents) * + sizeof(struct talitos_ptr) + authsize; alloc_len += dma_len; } else { dma_len = 0; @@ -1485,7 +1496,27 @@ static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, struct scatterlist *dst, unsigned int len, struct talitos_edesc *edesc) { - talitos_sg_unmap(dev, edesc, src, dst); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, +dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } } static void common_nonsnoop_unmap(struct device *dev, @@ -1528,25 +1559,42 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, bool is_sec1 = has_ftr_sec1(priv); to_talitos_ptr_len(ptr, len, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); - sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, - edesc->src_chained); + if (is_sec1) { +
[PATCH v3 15/17] crypto: talitos - SEC1 bugs on 0 data hash
SEC1 bugs on 0 data hash, so we submit an already padded block representing 0 data Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 24 1 file changed, 24 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index dddf4b3..f1406d7b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1797,6 +1797,27 @@ static void ahash_done(struct device *dev, areq->base.complete(&areq->base, err); } +/* + * SEC1 doesn't like hashing of 0 sized message, so we do the padding + * ourself and submit a padded block + */ +void talitos_handle_buggy_hash(struct talitos_ctx *ctx, + struct talitos_edesc *edesc, + struct talitos_ptr *ptr) +{ + static u8 padded_hash[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + pr_err_once("Bug in SEC1, padding ourself\n"); + edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD; + map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash), + (char *)padded_hash, DMA_TO_DEVICE); +} + static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct ahash_request *areq, unsigned int length, void (*callback) (struct device *dev, @@ -1857,6 +1878,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) + talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 16/17] crypto: talitos - Add fsl,sec1.0 compatible
We add a specific compatible for SEC1, to handle the differences between SEC1 and SEC2+ Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index f1406d7b..c04074d 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -3086,9 +3086,16 @@ err_out: } static const struct of_device_id talitos_match[] = { +#ifdef CONFIG_CRYPTO_DEV_TALITOS1 + { + .compatible = "fsl,sec1.0", + }, +#endif +#ifdef CONFIG_CRYPTO_DEV_TALITOS2 { .compatible = "fsl,sec2.0", }, +#endif {}, }; MODULE_DEVICE_TABLE(of, talitos_match); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 17/17] crypto: talitos - Update DT bindings with SEC1
This patch updates the documentation by including SEC1 into SEC2/3 doc Signed-off-by: Christophe Leroy --- Documentation/devicetree/bindings/crypto/fsl-sec2.txt | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt index 38988ef..f0d926b 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt @@ -1,9 +1,11 @@ -Freescale SoC SEC Security Engines versions 2.x-3.x +Freescale SoC SEC Security Engines versions 1.x-2.x-3.x Required properties: - compatible : Should contain entries for this and backward compatible - SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" + SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3) + e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1) +warning: SEC1 and SEC2 are mutually exclusive - reg : Offset and length of the register set for the device - interrupts : the SEC's interrupt number - fsl,num-channels : An integer representing the number of channels -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 00/17] crypto: talitos - Add support for SEC1
The purpose of this set of patchs is to add to talitos crypto driver the support for the SEC1 version of the security engine, which is found in mpc885 and mpc8272 processors. v3 is a complete rework of the patchset. Since a kernel can be built with support for both MPC82xx and MPC83xx at the same time, talitos driver shall support both SEC1 and SEC2+ at the same time. Based on cryptodev-2.6 tree Christophe Leroy (17): crypto: talitos - Use zero entry to init descriptors ptrs to zero crypto: talitos - Refactor the sg in/out chain allocation crypto: talitos - talitos_ptr renamed ptr for more lisibility crypto: talitos - Add a helper function to clear j_extent field crypto: talitos - remove param 'extent' in map_single_talitos_ptr() crypto: talitos - helper function for ptr len crypto: talitos - enhanced talitos_desc struct for SEC1 crypto: talitos - add sub-choice in talitos CONFIG for SEC1 crypto: talitos - Add a feature to tag SEC1 crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+ crypto: talitos - adaptation of talitos_submit() for SEC1 crypto: talitos - base address for Execution Units crypto: talitos - adapt interrupts and reset functions to SEC1 crypto: talitos - implement scatter/gather copy for SEC1 crypto: talitos - SEC1 bugs on 0 data hash crypto: talitos - Add fsl,sec1.0 compatible crypto: talitos - Update DT bindings with SEC1 .../devicetree/bindings/crypto/fsl-sec2.txt| 6 +- drivers/crypto/Kconfig | 18 + drivers/crypto/talitos.c | 727 +++-- drivers/crypto/talitos.h | 153 +++-- 4 files changed, 644 insertions(+), 260 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 01/17] crypto: talitos - Use zero entry to init descriptors ptrs to zero
Do use zero_entry value to init the descriptors ptrs to zero instead of writing 0 in each field Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 857414a..7bf1b2b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1373,9 +1373,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, int sg_count, ret; /* first DWORD empty */ - desc->ptr[0].len = 0; - to_talitos_ptr(&desc->ptr[0], 0); - desc->ptr[0].j_extent = 0; + desc->ptr[0] = zero_entry; /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); @@ -1445,9 +1443,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, DMA_FROM_DEVICE); /* last DWORD empty */ - desc->ptr[6].len = 0; - to_talitos_ptr(&desc->ptr[6], 0); - desc->ptr[6].j_extent = 0; + desc->ptr[6] = zero_entry; ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 02/17] crypto: talitos - Refactor the sg in/out chain allocation
This patch refactors the handling of the input and output data that is quite similar in several functions Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 159 --- 1 file changed, 81 insertions(+), 78 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 7bf1b2b..5a7e345 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1327,16 +1327,23 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, +struct scatterlist *dst, unsigned int len, +struct talitos_edesc *edesc) +{ + talitos_sg_unmap(dev, edesc, src, dst); +} + static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); + + unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); - if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1358,6 +1365,65 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + + ptr->len = cpu_to_be16(len); + ptr->j_extent = 0; + + sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, + edesc->src_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src)); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed */ + to_talitos_ptr(ptr, sg_dma_address(src)); + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + ptr->len = cpu_to_be16(len); + ptr->j_extent = 0; + + if (dir != DMA_NONE) + sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, + dir, edesc->dst_chained); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst)); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr)); + ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + sg_count = sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + } +} + static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, @@ -1387,56 +1453,16 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* * cipher in */ - desc->ptr[3].len = cpu_to_be16(cryptlen); - desc->ptr[3].j_extent = 0; - - sg_count = talitos_map_sg(dev, areq->src, edesc->src_nents ? : 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE, - edesc->src_chained);
[PATCH v3 03/17] crypto: talitos - talitos_ptr renamed ptr for more lisibility
Linux CodyingStyle recommends to use short variables for local variables. ptr is just good enough for those 3 lines functions. It helps keep single lines shorter than 80 characters. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 5a7e345..fca0aed 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -55,37 +55,37 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *talitos_ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) { - talitos_ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - talitos_ptr->eptr = upper_32_bits(dma_addr); + ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); + ptr->eptr = upper_32_bits(dma_addr); } /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ static void map_single_talitos_ptr(struct device *dev, - struct talitos_ptr *talitos_ptr, + struct talitos_ptr *ptr, unsigned short len, void *data, unsigned char extent, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); - talitos_ptr->len = cpu_to_be16(len); - to_talitos_ptr(talitos_ptr, dma_addr); - talitos_ptr->j_extent = extent; + ptr->len = cpu_to_be16(len); + to_talitos_ptr(ptr, dma_addr); + ptr->j_extent = extent; } /* * unmap bus single (contiguous) h/w descriptor pointer */ static void unmap_single_talitos_ptr(struct device *dev, -struct talitos_ptr *talitos_ptr, +struct talitos_ptr *ptr, enum dma_data_direction dir) { - dma_unmap_single(dev, be32_to_cpu(talitos_ptr->ptr), -be16_to_cpu(talitos_ptr->len), dir); + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), +be16_to_cpu(ptr->len), dir); } static int reset_channel(struct device *dev, int ch) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 04/17] crypto: talitos - Add a helper function to clear j_extent field
j_extent field is specific to SEC2 so we add a helper function to clear it so that SEC1 can redefine that function as nop Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index fca0aed..c93f79b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -61,6 +61,11 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) ptr->eptr = upper_32_bits(dma_addr); } +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) +{ + ptr->j_extent = 0; +} + /* * map virtual single (contiguous) pointer to h/w descriptor pointer */ @@ -1372,7 +1377,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, int sg_count; ptr->len = cpu_to_be16(len); - ptr->j_extent = 0; + to_talitos_ptr_extent_clear(ptr); sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, edesc->src_chained); @@ -1402,7 +1407,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, struct talitos_ptr *ptr, int sg_count) { ptr->len = cpu_to_be16(len); - ptr->j_extent = 0; + to_talitos_ptr_extent_clear(ptr); if (dir != DMA_NONE) sg_count = talitos_map_sg(dev, dst, edesc->dst_nents ? : 1, @@ -1444,7 +1449,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); desc->ptr[1].len = cpu_to_be16(ivsize); - desc->ptr[1].j_extent = 0; + to_talitos_ptr_extent_clear(&desc->ptr[1]); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 05/17] crypto: talitos - remove param 'extent' in map_single_talitos_ptr()
map_single_talitos_ptr() is always called with extent == 0, so lets remove this unused parameter Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c93f79b..81e5636 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -72,14 +72,13 @@ static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) static void map_single_talitos_ptr(struct device *dev, struct talitos_ptr *ptr, unsigned short len, void *data, - unsigned char extent, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); ptr->len = cpu_to_be16(len); to_talitos_ptr(ptr, dma_addr); - ptr->j_extent = extent; + to_talitos_ptr_extent_clear(ptr); } /* @@ -958,7 +957,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, - 0, DMA_TO_DEVICE); + DMA_TO_DEVICE); /* hmac data */ desc->ptr[1].len = cpu_to_be16(areq->assoclen + ivsize); @@ -1002,7 +1001,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, 0, + (char *)&ctx->key + ctx->authkeylen, DMA_TO_DEVICE); /* @@ -1080,7 +1079,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, DMA_FROM_DEVICE); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); @@ -1453,7 +1452,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); /* * cipher in @@ -1470,7 +1469,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, &desc->ptr[4], sg_count); /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, 0, + map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, DMA_FROM_DEVICE); /* last DWORD empty */ @@ -1595,7 +1594,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (!req_ctx->first || req_ctx->swinit) { map_single_talitos_ptr(dev, &desc->ptr[1], req_ctx->hw_context_size, - (char *)req_ctx->hw_context, 0, + (char *)req_ctx->hw_context, DMA_TO_DEVICE); req_ctx->swinit = 0; } else { @@ -1607,7 +1606,7 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* HMAC key */ if (ctx->keylen) map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, - (char *)&ctx->key, 0, DMA_TO_DEVICE); + (char *)&ctx->key, DMA_TO_DEVICE); else desc->ptr[2] = zero_entry; @@ -1624,11 +1623,11 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (req_ctx->last) map_single_talitos_ptr(dev, &desc->ptr[5], crypto_ahash_digestsize(tfm), - areq->result, 0, DMA_FROM_DEVICE); + areq->result, DMA_FROM_DEVICE); else map_single_talitos_ptr(dev, &desc->ptr[5], req_ctx->hw_context_size, - req_ctx->hw_context, 0, DMA_FROM_DEVICE); + req_ctx->hw_context, DMA_FROM_DEVICE); /* last DWORD empty */ desc->ptr[6] = zero_entry; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 06/17] crypto: talitos - helper function for ptr len
This patch adds a helper function for reads and writes of the len param of the talitos descriptor. This will help implement SEC1 later. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 24 +--- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 81e5636..bca6ded 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -61,6 +61,16 @@ static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) ptr->eptr = upper_32_bits(dma_addr); } +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len) +{ + ptr->len = cpu_to_be16(len); +} + +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr) +{ + return be16_to_cpu(ptr->len); +} + static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) { ptr->j_extent = 0; @@ -76,7 +86,7 @@ static void map_single_talitos_ptr(struct device *dev, { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr(ptr, dma_addr); to_talitos_ptr_extent_clear(ptr); } @@ -89,7 +99,7 @@ static void unmap_single_talitos_ptr(struct device *dev, enum dma_data_direction dir) { dma_unmap_single(dev, be32_to_cpu(ptr->ptr), -be16_to_cpu(ptr->len), dir); +from_talitos_ptr_len(ptr), dir); } static int reset_channel(struct device *dev, int ch) @@ -1375,7 +1385,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, { int sg_count; - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr_extent_clear(ptr); sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, @@ -1405,7 +1415,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, enum dma_data_direction dir, struct talitos_ptr *ptr, int sg_count) { - ptr->len = cpu_to_be16(len); + to_talitos_ptr_len(ptr, len); to_talitos_ptr_extent_clear(ptr); if (dir != DMA_NONE) @@ -1447,7 +1457,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma); - desc->ptr[1].len = cpu_to_be16(ivsize); + to_talitos_ptr_len(&desc->ptr[1], ivsize); to_talitos_ptr_extent_clear(&desc->ptr[1]); /* cipher key */ @@ -1539,11 +1549,11 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); /* When using hashctx-in, must unmap it. */ - if (edesc->desc.ptr[1].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[1])) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); - if (edesc->desc.ptr[2].len) + if (from_talitos_ptr_len(&edesc->desc.ptr[2])) unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 07/17] crypto: talitos - enhanced talitos_desc struct for SEC1
This patch enhances the talitos_desc struct with fields for SEC1. SEC1 has only one header field, and has a 'next_desc' field in addition. This mixed descriptor will continue to fit SEC2, and for SEC1 we will recopy hdr value into hdr1 value in talitos_submit() Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.h | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 61a1405..f078da1 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -37,9 +37,17 @@ /* descriptor pointer entry */ struct talitos_ptr { - __be16 len; /* length */ - u8 j_extent;/* jump to sg link table and/or extent */ - u8 eptr;/* extended address */ + union { + struct {/* SEC2 format */ + __be16 len; /* length */ + u8 j_extent;/* jump to sg link table and/or extent*/ + u8 eptr;/* extended address */ + }; + struct {/* SEC1 format */ + __be16 res; + __be16 len1;/* length */ + }; + }; __be32 ptr; /* address */ }; @@ -53,8 +61,12 @@ static const struct talitos_ptr zero_entry = { /* descriptor */ struct talitos_desc { __be32 hdr; /* header high bits */ - __be32 hdr_lo; /* header low bits */ + union { + __be32 hdr_lo; /* header low bits */ + __be32 hdr1;/* header for SEC1 */ + }; struct talitos_ptr ptr[7]; /* ptr/len pair array */ + __be32 next_desc; /* next descriptor (SEC1) */ }; /** -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 08/17] crypto: talitos - add sub-choice in talitos CONFIG for SEC1
This patch adds a CONFIG option to select SEC1, SEC2+ or both. Signed-off-by: Christophe Leroy --- drivers/crypto/Kconfig | 18 ++ 1 file changed, 18 insertions(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 800bf41..8a76a01 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -222,6 +222,24 @@ config CRYPTO_DEV_TALITOS To compile this driver as a module, choose M here: the module will be called talitos. +config CRYPTO_DEV_TALITOS1 + bool "SEC1 (SEC 1.0 and SEC Lite 1.2)" + depends on CRYPTO_DEV_TALITOS + depends on PPC_8xx || PPC_82xx + default y + help + Say 'Y' here to use the Freescale Security Engine (SEC) version 1.0 + found on MPC82xx or the Freescale Security Engine (SEC Lite) + version 1.2 found on MPC8xx + +config CRYPTO_DEV_TALITOS2 + bool "SEC2+ (SEC version 2.0 or upper)" + depends on CRYPTO_DEV_TALITOS + default y if !PPC_8xx + help + Say 'Y' here to use the Freescale Security Engine (SEC) + version 2 and following as found on MPC83xx, MPC85xx, etc ... + config CRYPTO_DEV_IXP4XX tristate "Driver for IXP4xx crypto hardware acceleration" depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 09/17] crypto: talitos - Add a feature to tag SEC1
We add a new feature in the features field, to mark compatible "fsl,sec1.0" We also define a helper function called has_ftr_sec1() to help functions quickly determine if they are running on SEC1 or SEC2+. When only SEC1 or SEC2 is compiled in, has_ftr_sec1() return trivial corresponding value. If both are compiled in, feature field is checked. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 3 +++ drivers/crypto/talitos.h | 17 + 2 files changed, 20 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index bca6ded..db95023 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2709,6 +2709,9 @@ static int talitos_probe(struct platform_device *ofdev) TALITOS_FTR_SHA224_HWINIT | TALITOS_FTR_HMAC_OK; + if (of_device_is_compatible(np, "fsl,sec1.0")) + priv->features |= TALITOS_FTR_SEC1; + priv->chan = kzalloc(sizeof(struct talitos_channel) * priv->num_channels, GFP_KERNEL); if (!priv->chan) { diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index f078da1..b0bdb4e 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -156,6 +156,23 @@ extern int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, #define TALITOS_FTR_HW_AUTH_CHECK 0x0002 #define TALITOS_FTR_SHA224_HWINIT 0x0004 #define TALITOS_FTR_HMAC_OK 0x0008 +#define TALITOS_FTR_SEC1 0x0010 + +/* + * If both CONFIG_CRYPTO_DEV_TALITOS1 and CONFIG_CRYPTO_DEV_TALITOS2 are + * defined, we check the features which are set according to the device tree. + * Otherwise, we answer true or false directly + */ +static inline bool has_ftr_sec1(struct talitos_private *priv) +{ +#if defined(CONFIG_CRYPTO_DEV_TALITOS1) && defined(CONFIG_CRYPTO_DEV_TALITOS2) + return priv->features & TALITOS_FTR_SEC1 ? true : false; +#elif defined(CONFIG_CRYPTO_DEV_TALITOS1) + return true; +#else + return false; +#endif +} /* * TALITOS_xxx_LO addresses point to the low data bits (32-63) of the register -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 10/17] crypto: talitos - fill in talitos descriptor iaw SEC1 or SEC2+
talitos descriptor is slightly different for SEC1 and SEC2+, so lets the helper function that fills the descriptor take into account the type of SEC. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 105 ++- 1 file changed, 67 insertions(+), 38 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index db95023..678b528 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -55,25 +55,38 @@ #include "talitos.h" -static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr) +static void to_talitos_ptr(struct talitos_ptr *ptr, dma_addr_t dma_addr, + bool is_sec1) { ptr->ptr = cpu_to_be32(lower_32_bits(dma_addr)); - ptr->eptr = upper_32_bits(dma_addr); + if (!is_sec1) + ptr->eptr = upper_32_bits(dma_addr); } -static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len) +static void to_talitos_ptr_len(struct talitos_ptr *ptr, unsigned short len, + bool is_sec1) { - ptr->len = cpu_to_be16(len); + if (is_sec1) { + ptr->res = 0; + ptr->len1 = cpu_to_be16(len); + } else { + ptr->len = cpu_to_be16(len); + } } -static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr) +static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, + bool is_sec1) { - return be16_to_cpu(ptr->len); + if (is_sec1) + return be16_to_cpu(ptr->len1); + else + return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr) +static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) { - ptr->j_extent = 0; + if (!is_sec1) + ptr->j_extent = 0; } /* @@ -85,10 +98,12 @@ static void map_single_talitos_ptr(struct device *dev, enum dma_data_direction dir) { dma_addr_t dma_addr = dma_map_single(dev, data, len, dir); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); - to_talitos_ptr_len(ptr, len); - to_talitos_ptr(ptr, dma_addr); - to_talitos_ptr_extent_clear(ptr); + to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr(ptr, dma_addr, is_sec1); + to_talitos_ptr_extent_clear(ptr, is_sec1); } /* @@ -98,8 +113,11 @@ static void unmap_single_talitos_ptr(struct device *dev, struct talitos_ptr *ptr, enum dma_data_direction dir) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + dma_unmap_single(dev, be32_to_cpu(ptr->ptr), -from_talitos_ptr_len(ptr), dir); +from_talitos_ptr_len(ptr, is_sec1), dir); } static int reset_channel(struct device *dev, int ch) @@ -922,7 +940,7 @@ static int sg_to_link_tbl(struct scatterlist *sg, int sg_count, int n_sg = sg_count; while (n_sg--) { - to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg)); + to_talitos_ptr(link_tbl_ptr, sg_dma_address(sg), 0); link_tbl_ptr->len = cpu_to_be16(sg_dma_len(sg)); link_tbl_ptr->j_extent = 0; link_tbl_ptr++; @@ -976,7 +994,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr)); + sizeof(struct talitos_ptr), 0); desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; /* assoc_nents - 1 entries for assoc, 1 for IV */ @@ -987,7 +1005,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_ptr += sg_count - 1; tbl_ptr->j_extent = 0; tbl_ptr++; - to_talitos_ptr(tbl_ptr, edesc->iv_dma); + to_talitos_ptr(tbl_ptr, edesc->iv_dma, 0); tbl_ptr->len = cpu_to_be16(ivsize); tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; @@ -996,14 +1014,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } else { if (areq->assoclen) to_talitos_ptr(&desc->ptr[1], - sg_dma_address(areq->assoc)); + sg_dma_address(areq->assoc), 0); else - to_talitos_ptr(&desc->ptr[1], edesc->iv_dma)
[PATCH v3 11/17] crypto: talitos - adaptation of talitos_submit() for SEC1
SEC1 descriptor is a bit different to SEC2+ descriptor. talitos_submit() will have to copy hdr field into hdr1 field and send the descriptor starting at hdr1 up to next_desc. For SEC2, it remains unchanged and next_desc is just ignored. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 23 +++ drivers/crypto/talitos.h | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 678b528..e6ea651 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -236,6 +236,7 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, struct talitos_request *request; unsigned long flags; int head; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].head_lock, flags); @@ -249,8 +250,17 @@ int talitos_submit(struct device *dev, int ch, struct talitos_desc *desc, request = &priv->chan[ch].fifo[head]; /* map descriptor and save caller data */ - request->dma_desc = dma_map_single(dev, desc, sizeof(*desc), - DMA_BIDIRECTIONAL); + if (is_sec1) { + desc->hdr1 = desc->hdr; + desc->next_desc = 0; + request->dma_desc = dma_map_single(dev, &desc->hdr1, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } else { + request->dma_desc = dma_map_single(dev, desc, + TALITOS_DESC_SIZE, + DMA_BIDIRECTIONAL); + } request->callback = callback; request->context = context; @@ -282,16 +292,21 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) struct talitos_request *request, saved_req; unsigned long flags; int tail, status; + bool is_sec1 = has_ftr_sec1(priv); spin_lock_irqsave(&priv->chan[ch].tail_lock, flags); tail = priv->chan[ch].tail; while (priv->chan[ch].fifo[tail].desc) { + __be32 hdr; + request = &priv->chan[ch].fifo[tail]; /* descriptors with their done bits set don't get the error */ rmb(); - if ((request->desc->hdr & DESC_HDR_DONE) == DESC_HDR_DONE) + hdr = is_sec1 ? request->desc->hdr1 : request->desc->hdr; + + if ((hdr & DESC_HDR_DONE) == DESC_HDR_DONE) status = 0; else if (!error) @@ -300,7 +315,7 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) status = error; dma_unmap_single(dev, request->dma_desc, -sizeof(struct talitos_desc), +TALITOS_DESC_SIZE, DMA_BIDIRECTIONAL); /* copy entries so we can call callback outside lock */ diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index b0bdb4e..f827c04 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -69,6 +69,8 @@ struct talitos_desc { __be32 next_desc; /* next descriptor (SEC1) */ }; +#define TALITOS_DESC_SIZE (sizeof(struct talitos_desc) - sizeof(__be32)) + /** * talitos_request - descriptor submission request * @desc: descriptor pointer (kernel virtual) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 12/17] crypto: talitos - base address for Execution Units
SEC 1.0, 1.2 and 2.x+ have different EU base addresses, so we need to define pointers for each EU in the driver private data structure. The proper address is set by the probe function depending on the SEC type, in order to provide access to the proper address. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 83 drivers/crypto/talitos.h | 72 + 2 files changed, 100 insertions(+), 55 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index e6ea651..6d77699 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -208,7 +208,7 @@ static int init_device(struct device *dev) /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) - setbits32(priv->reg + TALITOS_MDEUICR_LO, + setbits32(priv->reg_mdeu + TALITOS_EUICR_LO, TALITOS_MDEUICR_LO_ICE); return 0; @@ -424,44 +424,44 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) switch (desc_hdr & DESC_HDR_SEL0_MASK) { case DESC_HDR_SEL0_AFEU: dev_err(dev, "AFEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AFEUISR), - in_be32(priv->reg + TALITOS_AFEUISR_LO)); + in_be32(priv->reg_afeu + TALITOS_EUISR), + in_be32(priv->reg_afeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_DEU: dev_err(dev, "DEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_DEUISR), - in_be32(priv->reg + TALITOS_DEUISR_LO)); + in_be32(priv->reg_deu + TALITOS_EUISR), + in_be32(priv->reg_deu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_MDEUA: case DESC_HDR_SEL0_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_RNG: dev_err(dev, "RNGUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_RNGUISR), - in_be32(priv->reg + TALITOS_RNGUISR_LO)); + in_be32(priv->reg_rngu + TALITOS_ISR), + in_be32(priv->reg_rngu + TALITOS_ISR_LO)); break; case DESC_HDR_SEL0_PKEU: dev_err(dev, "PKEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_PKEUISR), - in_be32(priv->reg + TALITOS_PKEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_AESU: dev_err(dev, "AESUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_AESUISR), - in_be32(priv->reg + TALITOS_AESUISR_LO)); + in_be32(priv->reg_aesu + TALITOS_EUISR), + in_be32(priv->reg_aesu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_CRCU: dev_err(dev, "CRCUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_CRCUISR), - in_be32(priv->reg + TALITOS_CRCUISR_LO)); + in_be32(priv->reg_crcu + TALITOS_EUISR), + in_be32(priv->reg_crcu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL0_KEU: dev_err(dev, "KEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_KEUISR), - in_be32(priv->reg + TALITOS_KEUISR_LO)); + in_be32(priv->reg_pkeu + TALITOS_EUISR), + in_be32(priv->reg_pkeu + TALITOS_EUISR_LO)); break; } @@ -469,13 +469,13 @@ static void report_eu_error(struct device *dev, int ch, u32 desc_hdr) case DESC_HDR_SEL1_MDEUA: case DESC_HDR_SEL1_MDEUB: dev_err(dev, "MDEUISR 0x%08x_%08x\n", - in_be32(priv->reg + TALITOS_MDEUISR), - in_be32(priv->reg + TALITOS_MDEUISR_LO)); + in_be32(priv->reg_mdeu + TALITOS_EUISR), + in_be32(priv->reg_mdeu + TALITOS_EUISR_LO)); break; case DESC_HDR_SEL1_CRCU: dev_err(dev,
[PATCH v3 13/17] crypto: talitos - adapt interrupts and reset functions to SEC1
This patch adapts the interrupts handling and reset function for SEC1. On SEC1, registers are almost similar to SEC2+, but bits are sometimes located at different places. So we need to define TALITOS1 and TALITOS2 versions of some fields, and manage according to whether it is SEC1 or SEC2. On SEC1, only one interrupt vector is dedicated to the SEC, so only interrupt_4ch is needed. On SEC1, interrupts are enabled by clearing related bits in IMR, while on SEC2, interrupts are enabled by seting the bits in IMR. SEC1 also performs parity verification in the DES Unit. We have to disable this feature because the test vectors provided in the kernel have parity errors. In reset functions, only SEC2 supports continuation after error. For SEC1, we have to reset in all cases. For errors handling, SEC2+ names have been kept, but displayed text have been amended to reflect exact meaning on SEC1. Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 227 +++ drivers/crypto/talitos.h | 39 +--- 2 files changed, 199 insertions(+), 67 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 6d77699..1265405 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -124,12 +124,23 @@ static int reset_channel(struct device *dev, int ch) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; + bool is_sec1 = has_ftr_sec1(priv); - setbits32(priv->chan[ch].reg + TALITOS_CCCR, TALITOS_CCCR_RESET); + if (is_sec1) { + setbits32(priv->chan[ch].reg + TALITOS_CCCR_LO, + TALITOS1_CCCR_LO_RESET); - while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & TALITOS_CCCR_RESET) - && --timeout) - cpu_relax(); + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR_LO) & + TALITOS1_CCCR_LO_RESET) && --timeout) + cpu_relax(); + } else { + setbits32(priv->chan[ch].reg + TALITOS_CCCR, + TALITOS2_CCCR_RESET); + + while ((in_be32(priv->chan[ch].reg + TALITOS_CCCR) & + TALITOS2_CCCR_RESET) && --timeout) + cpu_relax(); + } if (timeout == 0) { dev_err(dev, "failed to reset channel %d\n", ch); @@ -152,11 +163,12 @@ static int reset_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); unsigned int timeout = TALITOS_TIMEOUT; - u32 mcr = TALITOS_MCR_SWR; + bool is_sec1 = has_ftr_sec1(priv); + u32 mcr = is_sec1 ? TALITOS1_MCR_SWR : TALITOS2_MCR_SWR; setbits32(priv->reg + TALITOS_MCR, mcr); - while ((in_be32(priv->reg + TALITOS_MCR) & TALITOS_MCR_SWR) + while ((in_be32(priv->reg + TALITOS_MCR) & mcr) && --timeout) cpu_relax(); @@ -180,6 +192,7 @@ static int init_device(struct device *dev) { struct talitos_private *priv = dev_get_drvdata(dev); int ch, err; + bool is_sec1 = has_ftr_sec1(priv); /* * Master reset @@ -203,8 +216,15 @@ static int init_device(struct device *dev) } /* enable channel done and error interrupts */ - setbits32(priv->reg + TALITOS_IMR, TALITOS_IMR_INIT); - setbits32(priv->reg + TALITOS_IMR_LO, TALITOS_IMR_LO_INIT); + if (is_sec1) { + clrbits32(priv->reg + TALITOS_IMR, TALITOS1_IMR_INIT); + clrbits32(priv->reg + TALITOS_IMR_LO, TALITOS1_IMR_LO_INIT); + /* disable parity error check in DEU (erroneous? test vect.) */ + setbits32(priv->reg_deu + TALITOS_EUICR, TALITOS1_DEUICR_KPE); + } else { + setbits32(priv->reg + TALITOS_IMR, TALITOS2_IMR_INIT); + setbits32(priv->reg + TALITOS_IMR_LO, TALITOS2_IMR_LO_INIT); + } /* disable integrity check error interrupts (use writeback instead) */ if (priv->features & TALITOS_FTR_HW_AUTH_CHECK) @@ -349,8 +369,37 @@ static void flush_channel(struct device *dev, int ch, int error, int reset_ch) /* * process completed requests for channels that have done status */ -#define DEF_TALITOS_DONE(name, ch_done_mask) \ -static void talitos_done_##name(unsigned long data)\ +#define DEF_TALITOS1_DONE(name, ch_done_mask) \ +static void talitos1_done_##name(unsigned long data) \ +{ \ + struct device *dev = (struct device *)data; \ + struct talitos_private *priv = dev_get_drvdata(dev);\ + unsigned long flags;
[PATCH v3 14/17] crypto: talitos - implement scatter/gather copy for SEC1
SEC1 doesn't support scatter/gather, SEC1 doesn't handle link tables. Therefore, for SEC1 we have to do it by SW. For that, we reserve space at the end of the extended descriptor, in lieu of the space reserved for the link tables on SEC2, and we perform sg_copy() when preparing the descriptors We also adapt the max buffer size which is only 32k on SEC1 while it is 64k on SEC2+ Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 138 ++- drivers/crypto/talitos.h | 3 +- 2 files changed, 103 insertions(+), 38 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 1265405..dddf4b3 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -866,9 +866,10 @@ badkey: * @dst_chained: whether dst is chained or not * @iv_dma: dma address of iv for checking continuity and link table * @dma_len: length of dma mapped link_tbl space - * @dma_link_tbl: bus physical address of link_tbl + * @dma_link_tbl: bus physical address of link_tbl/buf * @desc: h/w descriptor - * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) + * @link_tbl: input and output h/w link tables (if {src,dst}_nents > 1) (SEC2) + * @buf: input and output buffeur (if {src,dst}_nents > 1) (SEC1) * * if decrypting (with authcheck), or either one of src_nents or dst_nents * is greater than 1, an integrity check value is concatenated to the end @@ -885,7 +886,10 @@ struct talitos_edesc { int dma_len; dma_addr_t dma_link_tbl; struct talitos_desc desc; - struct talitos_ptr link_tbl[0]; + union { + struct talitos_ptr link_tbl[0]; + u8 buf[0]; + }; }; static int talitos_map_sg(struct device *dev, struct scatterlist *sg, @@ -1282,8 +1286,11 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + int max_len = is_sec1 ? TALITOS1_MAX_DATA_LEN : TALITOS2_MAX_DATA_LEN; - if (cryptlen + authsize > TALITOS_MAX_DATA_LEN) { + if (cryptlen + authsize > max_len) { dev_err(dev, "length exceeds h/w max limit\n"); return ERR_PTR(-EINVAL); } @@ -1327,8 +1334,12 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, */ alloc_len = sizeof(struct talitos_edesc); if (assoc_nents || src_nents || dst_nents) { - dma_len = (src_nents + dst_nents + 2 + assoc_nents) * - sizeof(struct talitos_ptr) + authsize; + if (is_sec1) + dma_len = src_nents ? cryptlen : 0 + + dst_nents ? cryptlen : 0; + else + dma_len = (src_nents + dst_nents + 2 + assoc_nents) * + sizeof(struct talitos_ptr) + authsize; alloc_len += dma_len; } else { dma_len = 0; @@ -1485,7 +1496,27 @@ static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, struct scatterlist *dst, unsigned int len, struct talitos_edesc *edesc) { - talitos_sg_unmap(dev, edesc, src, dst); + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, +dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } } static void common_nonsnoop_unmap(struct device *dev, @@ -1528,25 +1559,42 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, bool is_sec1 = has_ftr_sec1(priv); to_talitos_ptr_len(ptr, len, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); - sg_count = talitos_map_sg(dev, src, edesc->src_nents ? : 1, dir, - edesc->src_chained); + if (is_sec1) { +
[PATCH v3 15/17] crypto: talitos - SEC1 bugs on 0 data hash
SEC1 bugs on 0 data hash, so we submit an already padded block representing 0 data Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 24 1 file changed, 24 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index dddf4b3..f1406d7b 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1797,6 +1797,27 @@ static void ahash_done(struct device *dev, areq->base.complete(&areq->base, err); } +/* + * SEC1 doesn't like hashing of 0 sized message, so we do the padding + * ourself and submit a padded block + */ +void talitos_handle_buggy_hash(struct talitos_ctx *ctx, + struct talitos_edesc *edesc, + struct talitos_ptr *ptr) +{ + static u8 padded_hash[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }; + + pr_err_once("Bug in SEC1, padding ourself\n"); + edesc->desc.hdr &= ~DESC_HDR_MODE0_MDEU_PAD; + map_single_talitos_ptr(ctx->dev, ptr, sizeof(padded_hash), + (char *)padded_hash, DMA_TO_DEVICE); +} + static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct ahash_request *areq, unsigned int length, void (*callback) (struct device *dev, @@ -1857,6 +1878,9 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) + talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 16/17] crypto: talitos - Add fsl,sec1.0 compatible
We add a specific compatible for SEC1, to handle the differences between SEC1 and SEC2+ Signed-off-by: Christophe Leroy --- drivers/crypto/talitos.c | 7 +++ 1 file changed, 7 insertions(+) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index f1406d7b..c04074d 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -3086,9 +3086,16 @@ err_out: } static const struct of_device_id talitos_match[] = { +#ifdef CONFIG_CRYPTO_DEV_TALITOS1 + { + .compatible = "fsl,sec1.0", + }, +#endif +#ifdef CONFIG_CRYPTO_DEV_TALITOS2 { .compatible = "fsl,sec2.0", }, +#endif {}, }; MODULE_DEVICE_TABLE(of, talitos_match); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 17/17] crypto: talitos - Update DT bindings with SEC1
This patch updates the documentation by including SEC1 into SEC2/3 doc Signed-off-by: Christophe Leroy --- Documentation/devicetree/bindings/crypto/fsl-sec2.txt | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt index 38988ef..f0d926b 100644 --- a/Documentation/devicetree/bindings/crypto/fsl-sec2.txt +++ b/Documentation/devicetree/bindings/crypto/fsl-sec2.txt @@ -1,9 +1,11 @@ -Freescale SoC SEC Security Engines versions 2.x-3.x +Freescale SoC SEC Security Engines versions 1.x-2.x-3.x Required properties: - compatible : Should contain entries for this and backward compatible - SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" + SEC versions, high to low, e.g., "fsl,sec2.1", "fsl,sec2.0" (SEC2/3) + e.g., "fsl,sec1.2", "fsl,sec1.0" (SEC1) +warning: SEC1 and SEC2 are mutually exclusive - reg : Offset and length of the register set for the device - interrupts : the SEC's interrupt number - fsl,num-channels : An integer representing the number of channels -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/8xx: mmu_virtual_psize incorrect for 16k pages
mmu_virtual_psize shall be set to MMU_PAGE_16K when 16k pages have been selected Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/mmu-8xx.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 986b9e1..d41200c 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -145,7 +145,14 @@ typedef struct { } mm_context_t; #endif /* !__ASSEMBLY__ */ +#if (PAGE_SHIFT == 12) #define mmu_virtual_psize MMU_PAGE_4K +#elif (PAGE_SHIFT == 14) +#define mmu_virtual_psize MMU_PAGE_16K +#else +#error "Unsupported PAGE_SIZE" +#endif + #define mmu_linear_psize MMU_PAGE_8M #endif /* _ASM_POWERPC_MMU_8XX_H_ */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 00/11] powerpc8xx: Further optimisation of TLB handling
This patchset provides a further optimisation of TLB handling in the 8xx. Main changes are based on: - Using processor handling of PGD/PTE Validity bits instead of testing ourselves the entries validity - Aligning PGD address to allow direct bit manipulation - Not saving registers like CR when not needed It also adds support to any TASK_SIZE Patchset: 01 - powerpc/8xx: remove remaining unnecessary code in FixupDAR 02 - powerpc/8xx: remove tests on PGDIR entry validity 03 - powerpc32: Use kmem_cache memory for PGDIR 04 - powerpc/8xx: Take benefit of aligned PGDIR 05 - powerpc/8xx: Optimise access to swapper_pg_dir 06 - powerpc/8xx: Remove duplicated code in set_context() 07 - powerpc/8xx: macro for handling CPU15 errata 08 - powerpc/8xx: Handle CR out of exception PROLOG/EPILOG 09 - powerpc/8xx: dont save CR in SCRATCH registers 10 - powerpc/8xx: Use SPRG2 instead of DAR for saving r3 11 - powerpc/8xx: Add support for TASK_SIZE greater than 0x8000 All changes have been successfully tested on MPC885 Signed-off-by: Christophe Leroy Tested-by: Christophe Leroy --- arch/powerpc/include/asm/page.h | 8 +++ arch/powerpc/include/asm/pgtable-ppc32.h | 37 +--- arch/powerpc/include/asm/pgtable.h | 17 --- arch/powerpc/include/asm/pte-8xx.h | 1 + arch/powerpc/include/asm/pte-common.h| 3 +++ arch/powerpc/kernel/head_8xx.S | 3 --- arch/powerpc/mm/pgtable_32.c | 14 7 files changed, 56 insertions(+), 27 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 01/11] powerpc/8xx: remove remaining unnecessary code in FixupDAR
Since commit 33fb845a6f01 ("powerpc/8xx: Don't use MD_TWC for walk"), MD_EPN and MD_TWC are not writen anymore in FixupDAR so saving r3 has become useless. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 6 -- 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3d4b8ee..79b8a23 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -532,9 +532,6 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ -#ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 -#endif mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 @@ -551,9 +548,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Extract level 2 index */ rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr11, r10, r11 /* Get the pte */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR -#endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 02/11] powerpc/8xx: remove tests on PGDIR entry validity
Kernel MMU handling code handles validity of entries via _PMD_PRESENT which corresponds to V bit in MD_TWC and MI_TWC. When the V bit is not set, MPC8xx triggers TLBError exception. So we don't have to check that and branch ourself to TLBError. We can set TLB entries with non present entries, remove all those tests and let the 8xx handle it. This reduce the number of cycle when the entries are valid which is the case most of the time, and doesn't significantly increase the time for handling invalid entries. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 41 - 1 file changed, 8 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 79b8a23..2c329f1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -329,12 +329,9 @@ InstructionTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - /* We have a pte table, so load the MI_TWC with the attributes -* for this "segment." -*/ + /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -342,13 +339,11 @@ InstructionTLBMiss: lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP - andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT - cmpwi cr0, r11, _PAGE_ACCESSED | _PAGE_PRESENT - li r11, RPN_PATTERN - bne-cr0, 2f -#else - li r11, RPN_PATTERN + rlwinm r11, r10, 32-5, _PAGE_PRESENT + and r11, r11, r10 + rlwimi r10, r11, 0, _PAGE_PRESENT #endif + li r11, RPN_PATTERN /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 21 and 28 must be clear. * Software indicator bits 24, 25, 26, and 27 must be @@ -366,21 +361,6 @@ InstructionTLBMiss: mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 rfi -2: - mfspr r10, SPRN_SRR1 - /* clear all error bits as TLB Miss -* sets a few unconditionally - */ - rlwinm r10, r10, 0, 0x - mtspr SPRN_SRR1, r10 - - /* Restore registers */ -#ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ -#endif - mfspr r10, SPRN_SPRG_SCRATCH2 - b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -403,8 +383,6 @@ DataStoreTLBMiss: /* Extract level 1 index */ rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ - beq 2f /* If zero, don't try to find a pte */ /* We have a pte table, so load fetch the pte from the table. */ @@ -450,7 +428,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, RPN_PATTERN + li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ MTSPR_CPU6(SPRN_MD_RPN, r10, r3)/* Update TLB entry */ @@ -469,10 +447,7 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG_0 -InstructionTLBError1: - EXCEPTION_PROLOG_1 - EXCEPTION_PROLOG_2 + EXCEPTION_PROLOG mr r4,r12 mr r5,r9 andis. r10,r5,0x4000 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 03/11] powerpc32: Use kmem_cache memory for PGDIR
When pages are not 4K, PGDIR table is allocated with kmalloc(). In order to optimise TLB handlers, aligned memory is needed. kmalloc() doesn't provide aligned memory blocks, so lets use a kmem_cache pool instead. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/pgtable-ppc32.h | 4 arch/powerpc/mm/pgtable_32.c | 16 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 543bb8e..d323e8b 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -346,10 +346,14 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry) #define pte_to_pgoff(pte) (pte_val(pte) >> 3) #define pgoff_to_pte(off) ((pte_t) { ((off) << 3) | _PAGE_FILE }) +#ifndef CONFIG_PPC_4K_PAGES +void pgtable_cache_init(void); +#else /* * No page table caches to initialise */ #define pgtable_cache_init() do { } while (0) +#endif extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp); diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index cf11342..730dc2d 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -73,13 +73,25 @@ extern unsigned long p_mapped_by_tlbcam(phys_addr_t pa); #define PGDIR_ORDER(32 + PGD_T_LOG2 - PGDIR_SHIFT) +#ifndef CONFIG_PPC_4K_PAGES +static struct kmem_cache *pgtable_cache; + +void pgtable_cache_init(void) +{ + pgtable_cache = kmem_cache_create("PGDIR cache", 1 << PGDIR_ORDER, + 1 << PGDIR_ORDER, 0, NULL); + if (pgtable_cache == NULL) + panic("Couldn't allocate pgtable caches"); +} +#endif + pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; /* pgdir take page or two with 4K pages and a page fraction otherwise */ #ifndef CONFIG_PPC_4K_PAGES - ret = kzalloc(1 << PGDIR_ORDER, GFP_KERNEL); + ret = kmem_cache_alloc(pgtable_cache, GFP_KERNEL | __GFP_ZERO); #else ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER - PAGE_SHIFT); @@ -90,7 +102,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) void pgd_free(struct mm_struct *mm, pgd_t *pgd) { #ifndef CONFIG_PPC_4K_PAGES - kfree((void *)pgd); + kmem_cache_free(pgtable_cache, (void *)pgd); #else free_pages((unsigned long)pgd, PGDIR_ORDER - PAGE_SHIFT); #endif -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 04/11] powerpc/8xx: Take benefit of aligned PGDIR
L1 base address is now aligned so we can insert L1 index into r11 directly and then preserve r10 Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 34 +++--- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 2c329f1..ae05f28 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -326,16 +326,15 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -380,13 +379,12 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - /* Extract level 1 index */ - rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ + /* Insert level 1 index */ + rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ - mfspr r10, SPRN_MD_EPN/* Get address of fault */ /* Extract level 2 index */ rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ @@ -515,16 +513,14 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l - /* Extract level 1 index */ -3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwzxr11, r10, r11 /* Get the level 1 entry */ - rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ - mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - /* Extract level 2 index */ - rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 - lwzxr11, r10, r11 /* Get the pte */ + /* Insert level 1 index */ +3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 + lwz r11, 0(r11) /* Get the level 1 entry */ + rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ + /* Insert level 2 index */ + rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + lwz r11, 0(r11) /* Get the pte */ /* concat physical page address(r11) and page offset(r10) */ - mfspr r10, SPRN_SRR0 rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 05/11] powerpc/8xx: Optimise access to swapper_pg_dir
All accessed to PGD entries are done via 0(r11). By using lower part of swapper_pg_dir as load index to r11, we can remove the ori instruction. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 22 ++ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ae05f28..aa45225 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -322,13 +322,12 @@ InstructionTLBMiss: mfspr r11, SPRN_M_TW /* Get level 1 table base address */ #ifdef CONFIG_MODULES beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ /* Load the MI_TWC with the attributes for this "segment." */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ @@ -376,12 +375,11 @@ DataStoreTLBMiss: andis. r11, r10, 0x8000 mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ /* We have a pte table, so load fetch the pte from the table. */ @@ -510,12 +508,11 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address >= 0x8000 */ mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq-3f /* Branch if user space */ - lis r11, (swapper_pg_dir-PAGE_OFFSET)@h - ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l + beq 3f + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 - lwz r11, 0(r11) /* Get the level 1 entry */ + lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */ /* Insert level 2 index */ rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 @@ -670,8 +667,7 @@ start_here: * init's THREAD like the context switch code does, but this is * easier..until someone changes init's static structures. */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l + lis r6, swapper_pg_dir@ha tophys(r6,r6) #ifdef CONFIG_8xx_CPU6 lis r4, cpu6_errata_word@h @@ -850,6 +846,8 @@ _GLOBAL(set_context) stw r4, 0x4(r5) #endif + li r5, (swapper_pg_dir-PAGE_OFFSET)@l + sub r4, r4, r5 #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 06/11] powerpc/8xx: Remove duplicated code in set_context()
Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index aa45225..b227902e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -848,23 +848,21 @@ _GLOBAL(set_context) li r5, (swapper_pg_dir-PAGE_OFFSET)@l sub r4, r4, r5 + tophys (r4, r4) #ifdef CONFIG_8xx_CPU6 lis r6, cpu6_errata_word@h ori r6, r6, cpu6_errata_word@l - tophys (r4, r4) li r7, 0x3f80 stw r7, 12(r6) lwz r7, 12(r6) +#endif mtspr SPRN_M_TW, r4 /* Update MMU base address */ +#ifdef CONFIG_8xx_CPU6 li r7, 0x3380 stw r7, 12(r6) lwz r7, 12(r6) -mtspr SPRN_M_CASID, r3 /* Update context */ -#else -mtspr SPRN_M_CASID,r3/* Update context */ - tophys (r4, r4) - mtspr SPRN_M_TW, r4 /* and pgd */ #endif +mtspr SPRN_M_CASID, r3 /* Update context */ SYNC blr -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 07/11] powerpc/8xx: macro for handling CPU15 errata
Having a macro will help keep clear code. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b227902e..b3f3cb5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,6 +297,17 @@ SystemCall: * We have to use the MD_xxx registers for the tablewalk because the * equivalent MI_xxx registers only perform the attribute functions. */ + +#ifdef CONFIG_8xx_CPU15 +#define DO_8xx_CPU15(tmp, addr)\ + additmp, addr, PAGE_SIZE; \ + tlbie tmp;\ + additmp, addr, PAGE_SIZE; \ + tlbie tmp +#else +#define DO_8xx_CPU15(tmp, addr) +#endif + InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 @@ -304,12 +315,7 @@ InstructionTLBMiss: EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#ifdef CONFIG_8xx_CPU15 - addir11, r10, PAGE_SIZE - tlbie r11 - addir11, r10, -PAGE_SIZE - tlbie r11 -#endif + DO_8xx_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 08/11] powerpc/8xx: Handle CR out of exception PROLOG/EPILOG
In order to be able to reduce scope during which CR is saved, we take CR saving/restoring out of exception PROLOG and EPILOG Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index b3f3cb5..c89aed9 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -116,13 +116,13 @@ turn_on_mmu: */ #define EXCEPTION_PROLOG \ EXCEPTION_PROLOG_0; \ + mfcrr10;\ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 #define EXCEPTION_PROLOG_0 \ mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10 + mtspr SPRN_SPRG_SCRATCH1,r11 #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ @@ -162,7 +162,6 @@ turn_on_mmu: * Exception exit code. */ #define EXCEPTION_EPILOG_0 \ - mtcrr10;\ mfspr r10,SPRN_SPRG_SCRATCH0; \ mfspr r11,SPRN_SPRG_SCRATCH1 @@ -313,6 +312,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ DO_8xx_CPU15(r11, r10) @@ -363,6 +363,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -372,6 +373,7 @@ DataStoreTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_MD_EPN @@ -440,6 +442,7 @@ DataStoreTLBMiss: #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -465,6 +468,7 @@ InstructionTLBError: . = 0x1400 DataTLBError: EXCEPTION_PROLOG_0 + mfcrr10 mfspr r11, SPRN_DAR cmpwi cr0, r11, RPN_PATTERN -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 09/11] powerpc/8xx: dont save CR in SCRATCH registers
CR only needs to be preserved when checking if we are handling a kernel address. So we can preserve CR in a register: - In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we don't need to do anything at all with CR. - If CONFIG_8xx_CPU6 is defined, we have r3 available for saving CR - Otherwise, we use r10, then we reload SRR0/MD_EPN into r10 when CR is restored Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 53 +- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c89aed9..a073918 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -308,14 +308,10 @@ SystemCall: #endif InstructionTLBMiss: + EXCEPTION_PROLOG_0 #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 #endif - EXCEPTION_PROLOG_0 - mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - DO_8xx_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,14 +319,33 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ +#ifdef CONFIG_8xx_CPU6 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + DO_8xx_CPU15(r11, r10) + mfcrr3 andis. r11, r10, 0x8000/* Address >= 0x8000 */ +#else + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + DO_8xx_CPU15(r10, r11) + mfcrr10 + andis. r11, r11, 0x8000/* Address >= 0x8000 */ #endif mfspr r11, SPRN_M_TW /* Get level 1 table base address */ -#ifdef CONFIG_MODULES beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: +#ifdef CONFIG_8xx_CPU6 + mtcrr3 +#else + mtcrr10 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #endif +#else /* CONFIG_MODULES */ + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + DO_8xx_CPU15(r11, r10) + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ +#endif /* CONFIG_MODULES */ + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ @@ -362,29 +377,37 @@ InstructionTLBMiss: mfspr r3, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ #endif - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi . = 0x1200 DataStoreTLBMiss: -#ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 -#endif EXCEPTION_PROLOG_0 - mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ +#ifdef CONFIG_8xx_CPU6 + mtspr SPRN_DAR, r3 + mfcrr3 + mfspr r10, SPRN_MD_EPN andis. r11, r10, 0x8000 +#else + mfcrr10 + mfspr r11, SPRN_MD_EPN + andis. r11, r11, 0x8000 +#endif mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: +#ifdef CONFIG_8xx_CPU6 + mtcrr3 +#else + mtcrr10 + mfspr r10, SPRN_MD_EPN +#endif + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ @@ -441,8 +464,6 @@ DataStoreTLBMiss: mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 10/11] powerpc/8xx: Use SPRG2 instead of DAR for saving r3
We now have SPRG2 available as in it not used anymore for saving CR, so we don't need to crash DAR anymore for saving r3 for CPU6 ERRATA handling. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a073918..dbe110e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -310,7 +310,7 @@ SystemCall: InstructionTLBMiss: EXCEPTION_PROLOG_0 #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif /* If we are faulting a kernel address, we have to use the @@ -374,8 +374,7 @@ InstructionTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi @@ -388,7 +387,7 @@ DataStoreTLBMiss: * kernel page tables. */ #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 mfcrr3 mfspr r10, SPRN_MD_EPN andis. r11, r10, 0x8000 @@ -461,7 +460,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR + mfspr r3, SPRN_SPRG_SCRATCH2 #endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 11/11] powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000
By default, TASK_SIZE is set to 0x8000 for PPC_8xx, which is most likely sufficient for most cases. However, kernel configuration allows to set TASK_SIZE to another value, so the 8xx shall handle it. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 29 + 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index dbe110e..d380658 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -48,6 +48,19 @@ mtspr spr, reg #endif +/* Macro to test if an address is a kernel address */ +#if CONFIG_TASK_SIZE <= 0x8000 +#define IS_KERNEL(tmp, addr) \ + andis. tmp, addr, 0x8000 /* Address >= 0x8000 */ +#define BRANCH_UNLESS_KERNEL(label)beq label +#else +#define IS_KERNEL(tmp, addr) \ + rlwinm tmp, addr, 16, 16, 31; \ + cmpli cr0, tmp, PAGE_OFFSET >> 16 +#define BRANCH_UNLESS_KERNEL(label)blt label +#endif + + /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -323,15 +336,15 @@ InstructionTLBMiss: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ DO_8xx_CPU15(r11, r10) mfcrr3 - andis. r11, r10, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r10) #else mfspr r11, SPRN_SRR0 /* Get effective address of fault */ DO_8xx_CPU15(r10, r11) mfcrr10 - andis. r11, r11, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r11) #endif mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #ifdef CONFIG_8xx_CPU6 @@ -390,14 +403,14 @@ DataStoreTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r3 mfcrr3 mfspr r10, SPRN_MD_EPN - andis. r11, r10, 0x8000 + IS_KERNEL(r11, r10) #else mfcrr10 mfspr r11, SPRN_MD_EPN - andis. r11, r11, 0x8000 + IS_KERNEL(r11, r11) #endif mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #ifdef CONFIG_8xx_CPU6 @@ -536,9 +549,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - andis. r11, r10, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table base address */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 0/5] powerpc8xx: Further optimisation of TLB handling
This patchset provides a further optimisation of TLB handling in the 8xx. Changes are: - Not saving registers like CR when not needed - Adding support to any TASK_SIZE Only the last patch of the set is changed compared to v4 Resending with proper From: this time. Christophe Leroy (5): powerpc/8xx: macro for handling CPU15 errata powerpc/8xx: Handle CR out of exception PROLOG/EPILOG powerpc/8xx: dont save CR in SCRATCH registers powerpc/8xx: Use SPRG2 instead of DAR for saving r3 powerpc/8xx: Add support for TASK_SIZE greater than 0x8000 arch/powerpc/kernel/head_8xx.S | 79 +++--- 1 file changed, 51 insertions(+), 28 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 1/5] powerpc/8xx: macro for handling CPU15 errata
Having a macro will help keep clear code. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 9b53fe1..1279018 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,6 +297,17 @@ SystemCall: * We have to use the MD_xxx registers for the tablewalk because the * equivalent MI_xxx registers only perform the attribute functions. */ + +#ifdef CONFIG_8xx_CPU15 +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) \ + additmp, addr, PAGE_SIZE; \ + tlbie tmp;\ + additmp, addr, -PAGE_SIZE; \ + tlbie tmp +#else +#define INVALIDATE_ADJACENT_PAGES_CPU15(tmp, addr) +#endif + InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 mtspr SPRN_DAR, r3 @@ -304,12 +315,7 @@ InstructionTLBMiss: EXCEPTION_PROLOG_0 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ -#ifdef CONFIG_8xx_CPU15 - addir11, r10, PAGE_SIZE - tlbie r11 - addir11, r10, -PAGE_SIZE - tlbie r11 -#endif + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 2/5] powerpc/8xx: Handle CR out of exception PROLOG/EPILOG
In order to be able to reduce scope during which CR is saved, we take CR saving/restoring out of exception PROLOG and EPILOG Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1279018..5a69c5e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -116,13 +116,13 @@ turn_on_mmu: */ #define EXCEPTION_PROLOG \ EXCEPTION_PROLOG_0; \ + mfcrr10;\ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 #define EXCEPTION_PROLOG_0 \ mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10 + mtspr SPRN_SPRG_SCRATCH1,r11 #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ @@ -162,7 +162,6 @@ turn_on_mmu: * Exception exit code. */ #define EXCEPTION_EPILOG_0 \ - mtcrr10;\ mfspr r10,SPRN_SPRG_SCRATCH0; \ mfspr r11,SPRN_SPRG_SCRATCH1 @@ -313,6 +312,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) @@ -363,6 +363,7 @@ InstructionTLBMiss: mtspr SPRN_DAR, r11 /* Tag DAR */ #endif mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -372,6 +373,7 @@ DataStoreTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 + mfcrr10 mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_MD_EPN @@ -437,6 +439,7 @@ DataStoreTLBMiss: #endif mtspr SPRN_DAR, r11 /* Tag DAR */ mfspr r10, SPRN_SPRG_SCRATCH2 + mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -462,6 +465,7 @@ InstructionTLBError: . = 0x1400 DataTLBError: EXCEPTION_PROLOG_0 + mfcrr10 mfspr r11, SPRN_DAR cmpwi cr0, r11, RPN_PATTERN -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 3/5] powerpc/8xx: dont save CR in SCRATCH registers
CR only needs to be preserved when checking if we are handling a kernel address. So we can preserve CR in a register: - In ITLBMiss, check is done only when CONFIG_MODULES is defined. Otherwise we don't need to do anything at all with CR. - We use r10, then we reload SRR0/MD_EPN into r10 when CR is restored Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 29 +++-- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5a69c5e..150d03f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,10 +312,6 @@ InstructionTLBMiss: mtspr SPRN_DAR, r3 #endif EXCEPTION_PROLOG_0 - mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_SRR0 /* Get effective address of fault */ - INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -323,13 +319,20 @@ InstructionTLBMiss: #ifdef CONFIG_MODULES /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ - andis. r11, r10, 0x8000/* Address >= 0x8000 */ -#endif + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) + mfcrr10 + andis. r11, r11, 0x8000/* Address >= 0x8000 */ mfspr r11, SPRN_M_TW /* Get level 1 table */ -#ifdef CONFIG_MODULES beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcrr10 + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ +#else + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) + mfspr r11, SPRN_M_TW /* Get level 1 table base address */ #endif /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -362,8 +365,6 @@ InstructionTLBMiss: mfspr r3, SPRN_DAR mtspr SPRN_DAR, r11 /* Tag DAR */ #endif - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi @@ -374,17 +375,19 @@ DataStoreTLBMiss: #endif EXCEPTION_PROLOG_0 mfcrr10 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_MD_EPN /* If we are faulting a kernel address, we have to use the * kernel page tables. */ - andis. r11, r10, 0x8000 + mfspr r11, SPRN_MD_EPN + andis. r11, r11, 0x8000 mfspr r11, SPRN_M_TW /* Get level 1 table */ beq 3f lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: + mtcrr10 + mfspr r10, SPRN_MD_EPN + /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11)/* Get the level 1 entry */ @@ -438,8 +441,6 @@ DataStoreTLBMiss: mfspr r3, SPRN_DAR #endif mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r10, SPRN_SPRG_SCRATCH2 - mtcrr10 EXCEPTION_EPILOG_0 rfi -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 4/5] powerpc/8xx: Use SPRG2 instead of DAR for saving r3
We now have SPRG2 available as in it not used anymore for saving CR, so we don't need to crash DAR anymore for saving r3 for CPU6 ERRATA handling. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 150d03f..ba2dc53 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -309,7 +309,7 @@ SystemCall: InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -362,8 +362,7 @@ InstructionTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR - mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 rfi @@ -371,7 +370,7 @@ InstructionTLBMiss: . = 0x1200 DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 - mtspr SPRN_DAR, r3 + mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 mfcrr10 @@ -438,7 +437,7 @@ DataStoreTLBMiss: /* Restore registers */ #ifdef CONFIG_8xx_CPU6 - mfspr r3, SPRN_DAR + mfspr r3, SPRN_SPRG_SCRATCH2 #endif mtspr SPRN_DAR, r11 /* Tag DAR */ EXCEPTION_EPILOG_0 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH RESEND v5 5/5] powerpc/8xx: Add support for TASK_SIZE greater than 0x80000000
By default, TASK_SIZE is set to 0x8000 for PPC_8xx, which is most likely sufficient for most cases. However, kernel configuration allows to set TASK_SIZE to another value, so the 8xx shall handle it. This patch also takes into account the case of PAGE_OFFSET lower than 0x8000, allthought most of the time it is equal to 0xC000 Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 25 +++-- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ba2dc53..c640bbb 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -48,6 +48,19 @@ mtspr spr, reg #endif +/* Macro to test if an address is a kernel address */ +#if CONFIG_TASK_SIZE <= 0x8000 && CONFIG_PAGE_OFFSET >= 0x8000 +#define IS_KERNEL(tmp, addr) \ + andis. tmp, addr, 0x8000 /* Address >= 0x8000 */ +#define BRANCH_UNLESS_KERNEL(label)beq label +#else +#define IS_KERNEL(tmp, addr) \ + rlwinm tmp, addr, 16, 16, 31; \ + cmpli cr0, tmp, PAGE_OFFSET >> 16 +#define BRANCH_UNLESS_KERNEL(label)blt label +#endif + + /* * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. @@ -322,9 +335,9 @@ InstructionTLBMiss: mfspr r11, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mfcrr10 - andis. r11, r11, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcrr10 @@ -379,9 +392,9 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r11, SPRN_MD_EPN - andis. r11, r11, 0x8000 + IS_KERNEL(r11, r11) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: mtcrr10 @@ -513,9 +526,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - andis. r11, r10, 0x8000/* Address >= 0x8000 */ + IS_KERNEL(r11, r10) mfspr r11, SPRN_M_TW /* Get level 1 table */ - beq 3f + BRANCH_UNLESS_KERNEL(3f) lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha /* Insert level 1 index */ 3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/3] powerpc/8xx: mark _PAGE_SHARED all types of kernel pages
All kernel pages have to be marked as shared in order to not perform CASID verification. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/pte-8xx.h | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index eb6edb4..c8aacad 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -63,7 +63,12 @@ /* We need to add _PAGE_SHARED to kernel pages */ #define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_RO | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ +_PAGE_EXEC) +#define _PAGE_KERNEL_RW(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ +_PAGE_HWWRITE) +#define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ +_PAGE_HWWRITE | _PAGE_EXEC) #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_PTE_8xx_H */ -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc/8xx: Handle PAGE_USER via APG bits
Use of APG for handling PAGE_USER. All pages PP exec bits are set to either 000 or 011, which means respectively RW for Supervisor and no access for User, or RO for Supervisor and no access for user. Then we use the APG to say whether accesses are according to Page rules or "all Supervisor" rules (Access to all) Therefore, we define 2 APG groups corresponding to _PAGE_USER. Mx_AP are initialised as follows: GP0 => No user => 01 (all accesses performed according to page definition) GP1 => User => 00 (all accesses performed as supervisor according to page definition) This removes the special 8xx handling in pte_update() Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/pgtable-ppc32.h | 19 --- arch/powerpc/include/asm/pte-8xx.h | 27 +-- arch/powerpc/kernel/head_8xx.S | 21 - 3 files changed, 21 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index b3e9a3e..d280fa2 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -170,24 +170,6 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; -#ifdef CONFIG_PPC_8xx - unsigned long tmp2; - - __asm__ __volatile__("\ -1: lwarx %0,0,%4\n\ - andc%1,%0,%5\n\ - or %1,%1,%6\n\ - /* 0x200 == Extended encoding, bit 22 */ \ - /* Bit 22 has to be 1 when _PAGE_USER is unset and _PAGE_RO is set */ \ - rlwimi %1,%1,32-1,0x200\n /* get _PAGE_RO */ \ - rlwinm %3,%1,32-2,0x200\n /* get _PAGE_USER */ \ - andc%1,%1,%3\n\ - stwcx. %1,0,%4\n\ - bne-1b" - : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) - : "r" (p), "r" (clr), "r" (set), "m" (*p) - : "cc" ); -#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc%1,%0,%4\n\ @@ -198,7 +180,6 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); -#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index c8aacad..7926d4e 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -35,36 +35,27 @@ #define _PAGE_SPECIAL 0x0008 /* SW entry, forced to 0 by the TLB miss */ #define _PAGE_DIRTY0x0100 /* C: page changed */ -/* These 4 software bits must be masked out when the entry is loaded - * into the TLB, 1 SW bit left(0x0080). +/* These 4 software bits must be masked out when the L2 entry is loaded + * into the TLB. */ -#define _PAGE_GUARDED 0x0010 /* software: guarded access */ -#define _PAGE_ACCESSED 0x0020 /* software: page referenced */ -#define _PAGE_WRITETHRU0x0040 /* software: caching is write through */ +#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ +#define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ +#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_WRITETHRU0x0080 /* software: caching is write through */ -/* Setting any bits in the nibble with the follow two controls will - * require a TLB exception handler change. It is assumed unused bits - * are always zero. - */ -#define _PAGE_RO 0x0400 /* lsb PP bits */ -#define _PAGE_USER 0x0800 /* msb PP bits */ -/* set when _PAGE_USER is unset and _PAGE_RO is set */ -#define _PAGE_KNLRO0x0200 +#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_KNLRO - /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO) -#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_KNLRO | \ -_PAGE_EXEC) +#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_RO) +#define _PAGE_KERNEL_ROX (_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC) #define _PAGE_KERNEL_RW(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ _PAGE_HWWRITE) #define _PAGE_KERNEL_RWX (_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \ diff --git a/arch/powerpc/kernel/head_8
[PATCH 3/3] powerpc/8xx: Implementation of PAGE_EXEC
This patch implements PAGE_EXEC capability on the 8xx. All pages PP exec bits are set to 000, which means Execute for Supervisor and no Execute for User. Then we use the APG to say whether accesses are according to Page rules, "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER. MI_AP is initialised as follows: GP0 (00) => Not User, no exec => 11 (all accesses performed as user) GP1 (01) => User but no exec => 11 (all accesses performed as user) GP2 (10) => Not User, exec => 01 (rights according to page definition) GP3 (11) => User, exec => 00 (all accesses performed as supervisor) Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 26 ++ arch/powerpc/include/asm/pte-8xx.h | 3 ++- arch/powerpc/kernel/head_8xx.S | 12 +--- 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index daa5af9..c9aa2db 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -360,7 +360,7 @@ extern const char *powerpc_base_platform; CPU_FTR_USE_TB | CPU_FTR_MAYBE_CAN_NAP | \ CPU_FTR_COMMON | CPU_FTR_FPU_UNAVAILABLE) #define CPU_FTRS_CLASSIC32 (CPU_FTR_COMMON | CPU_FTR_USE_TB) -#define CPU_FTRS_8XX (CPU_FTR_USE_TB) +#define CPU_FTRS_8XX (CPU_FTR_USE_TB | CPU_FTR_NOEXECUTE) #define CPU_FTRS_40X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_44X (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE) #define CPU_FTRS_440x6 (CPU_FTR_USE_TB | CPU_FTR_NODSISRALIGN | CPU_FTR_NOEXECUTE | \ diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index d41200c..1407034 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -27,6 +27,19 @@ #define MI_Ks 0x8000 /* Should not be set */ #define MI_Kp 0x4000 /* Should always be set */ +/* + * All pages PP exec bits are set to 000, which means Execute for Supervisor + * and no Execute for User. + * Then we use the APG to say whether accesses are according to Page rules, + * "all Supervisor" rules (Exec for all) and "all User" rules (Exec for noone) + * Therefore, we define 4 APG groups. msb is _PAGE_EXEC, lsb is _PAGE_USER + * 0 (00) => Not User, no exec => 11 (all accesses performed as user) + * 1 (01) => User but no exec => 11 (all accesses performed as user) + * 2 (10) => Not User, exec => 01 (rights according to page definition) + * 3 (11) => User, exec => 00 (all accesses performed as supervisor) + */ +#define MI_APG_INIT0xf4ff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MI_RPN is written, bits in * this register are used to create the TLB entry. @@ -87,6 +100,19 @@ #define MD_Ks 0x8000 /* Should not be set */ #define MD_Kp 0x4000 /* Should always be set */ +/* + * All pages PP exec bits are set to either 000 or 011, which means respectively + * RW for Supervisor and no access for User, or RO for Supervisor and no access + * for user. + * Then we use the APG to say whether accesses are according to Page rules or + * "all Supervisor" rules (Access to all) + * Therefore, we define 2 APG groups. lsb is _PAGE_USER + * 0 => No user => 01 (all accesses performed according to page definition) + * 1 => User => 00 (all accesses performed as supervisor + * according to page definition) + */ +#define MD_APG_INIT0x4fff + /* The effective page number register. When read, contains the information * about the last instruction TLB miss. When MD_RPN is written, bits in * this register are used to create the TLB entry. diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index 7926d4e..eae9b05 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -40,8 +40,9 @@ */ #define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */ #define _PAGE_USER 0x0020 /* Copied to L1 APG lsb */ -#define _PAGE_ACCESSED 0x0040 /* software: page referenced */ +#define _PAGE_EXEC 0x0040 /* Copied to L1 APG */ #define _PAGE_WRITETHRU0x0080 /* software: caching is write through */ +#define _PAGE_ACCESSED 0x0800 /* software: page referenced */ #define _PAGE_RO 0x0600 /* Supervisor RO, User no access */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 508c645..29a5c1a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -369,7 +369,7 @@ InstructionTLBMiss:
[PATCH 0/3] powerpc/8xx: Implementation of execute protection
This patchset implements execute protection on the 8xx. It also simplifies the handling of PAGE_USER and PAGE_RO, and adds a small fix to the kernel pages definition. This patchset goes on to of my previous patchset named "[v5] powerpc8xx: Further optimisation of TLB handling" Christophe Leroy (3): powerpc/8xx: mark _PAGE_SHARED all types of kernel pages powerpc/8xx: Handle PAGE_USER via APG bits powerpc/8xx: Implementation of PAGE_EXEC arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/include/asm/mmu-8xx.h | 26 ++ arch/powerpc/include/asm/pgtable-ppc32.h | 19 --- arch/powerpc/include/asm/pte-8xx.h | 31 ++- arch/powerpc/kernel/head_8xx.S | 31 --- 5 files changed, 61 insertions(+), 48 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2] spi: fsl-spi: use of_iomap() to map parameter ram on CPM1
On CPM2, the SPI parameter RAM is dynamically allocated in the dualport RAM whereas in CPM1, it is statically allocated to a default address with capability to relocate it somewhere else via the use of CPM micropatch. The address of the parameter RAM is given by the boot loader and expected to be mapped via of_iomap() In the current implementation, in function fsl_spi_cpm_get_pram() there is a confusion between the SPI_BASE register and the base of the SPI parameter RAM. Fortunatly, it is working properly with MPC866 and MPC885 because they do set SPI_BASE, but on MPC860 and other old MPC8xx that doesn't set SPI_BASE, pram_ofs is not properly set. Also, the parameter RAM is not properly mapped with of_iomap() as it should but still gets accessible by chance through the full RAM which is mapped from somewhere else. This patch applies to the SPI driver the same principle as for the CPM UART: when the CPM is of type CPM1, we simply do an of_iomap() of the area provided via the device tree. Signed-off-by: Christophe Leroy --- v2: Use devm_ioremap_resource() instead of_iomap() drivers/spi/spi-fsl-cpm.c | 35 ++- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c index e85ab1c..4e5c945 100644 --- a/drivers/spi/spi-fsl-cpm.c +++ b/drivers/spi/spi-fsl-cpm.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "spi-fsl-cpm.h" #include "spi-fsl-lib.h" @@ -264,17 +265,6 @@ static unsigned long fsl_spi_cpm_get_pram(struct mpc8xxx_spi *mspi) if (mspi->flags & SPI_CPM2) { pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64); out_be16(spi_base, pram_ofs); - } else { - struct spi_pram __iomem *pram = spi_base; - u16 rpbase = in_be16(&pram->rpbase); - - /* Microcode relocation patch applied? */ - if (rpbase) { - pram_ofs = rpbase; - } else { - pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64); - out_be16(spi_base, pram_ofs); - } } iounmap(spi_base); @@ -287,7 +277,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) struct device_node *np = dev->of_node; const u32 *iprop; int size; - unsigned long pram_ofs; unsigned long bds_ofs; if (!(mspi->flags & SPI_CPM_MODE)) @@ -314,8 +303,21 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) } } - pram_ofs = fsl_spi_cpm_get_pram(mspi); - if (IS_ERR_VALUE(pram_ofs)) { + if (mspi->flags & SPI_CPM1) { + struct resource *res; + + res = platform_get_resource(to_platform_device(dev), + IORESOURCE_MEM, 1); + mspi->pram = devm_ioremap_resource(dev, res); + } else { + unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi); + + if (IS_ERR_VALUE(pram_ofs)) + mspi->pram = NULL; + else + mspi->pram = cpm_muram_addr(pram_ofs); + } + if (mspi->pram == NULL) { dev_err(dev, "can't allocate spi parameter ram\n"); goto err_pram; } @@ -341,8 +343,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) goto err_dummy_rx; } - mspi->pram = cpm_muram_addr(pram_ofs); - mspi->tx_bd = cpm_muram_addr(bds_ofs); mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd)); @@ -370,7 +370,8 @@ err_dummy_rx: err_dummy_tx: cpm_muram_free(bds_ofs); err_bds: - cpm_muram_free(pram_ofs); + if (!(mspi->flags & SPI_CPM1)) + cpm_muram_free(cpm_muram_offset(mspi->pram)); err_pram: fsl_spi_free_dummy_rx(); return -ENOMEM; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3] spi: fsl-spi: use devm_ioremap_resource() to map parameter ram on CPM1
On CPM2, the SPI parameter RAM is dynamically allocated in the dualport RAM whereas in CPM1, it is statically allocated to a default address with capability to relocate it somewhere else via the use of CPM micropatch. The address of the parameter RAM is given by the boot loader and expected to be mapped via devm_ioremap_resource() In the current implementation, in function fsl_spi_cpm_get_pram() there is a confusion between the SPI_BASE register and the base of the SPI parameter RAM. Fortunatly, it is working properly with MPC866 and MPC885 because they do set SPI_BASE, but on MPC860 and other old MPC8xx that doesn't set SPI_BASE, pram_ofs is not properly set. Also, the parameter RAM is not properly mapped with devm_ioremap_resource() as it should but still gets accessible by chance through the full RAM which is mapped from somewhere else. This patch applies to the SPI driver the same principle as for the CPM UART: when the CPM is of type CPM1, we simply do an devm_ioremap_resource() of the area provided via the device tree. Signed-off-by: Christophe Leroy --- v2: Use devm_ioremap_resource() instead of_iomap() v3: Replaced of_iomap() by devm_ioremap_resource() in the patch text drivers/spi/spi-fsl-cpm.c | 35 ++- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c index e85ab1c..4e5c945 100644 --- a/drivers/spi/spi-fsl-cpm.c +++ b/drivers/spi/spi-fsl-cpm.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "spi-fsl-cpm.h" #include "spi-fsl-lib.h" @@ -264,17 +265,6 @@ static unsigned long fsl_spi_cpm_get_pram(struct mpc8xxx_spi *mspi) if (mspi->flags & SPI_CPM2) { pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64); out_be16(spi_base, pram_ofs); - } else { - struct spi_pram __iomem *pram = spi_base; - u16 rpbase = in_be16(&pram->rpbase); - - /* Microcode relocation patch applied? */ - if (rpbase) { - pram_ofs = rpbase; - } else { - pram_ofs = cpm_muram_alloc(SPI_PRAM_SIZE, 64); - out_be16(spi_base, pram_ofs); - } } iounmap(spi_base); @@ -287,7 +277,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) struct device_node *np = dev->of_node; const u32 *iprop; int size; - unsigned long pram_ofs; unsigned long bds_ofs; if (!(mspi->flags & SPI_CPM_MODE)) @@ -314,8 +303,21 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) } } - pram_ofs = fsl_spi_cpm_get_pram(mspi); - if (IS_ERR_VALUE(pram_ofs)) { + if (mspi->flags & SPI_CPM1) { + struct resource *res; + + res = platform_get_resource(to_platform_device(dev), + IORESOURCE_MEM, 1); + mspi->pram = devm_ioremap_resource(dev, res); + } else { + unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi); + + if (IS_ERR_VALUE(pram_ofs)) + mspi->pram = NULL; + else + mspi->pram = cpm_muram_addr(pram_ofs); + } + if (mspi->pram == NULL) { dev_err(dev, "can't allocate spi parameter ram\n"); goto err_pram; } @@ -341,8 +343,6 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) goto err_dummy_rx; } - mspi->pram = cpm_muram_addr(pram_ofs); - mspi->tx_bd = cpm_muram_addr(bds_ofs); mspi->rx_bd = cpm_muram_addr(bds_ofs + sizeof(*mspi->tx_bd)); @@ -370,7 +370,8 @@ err_dummy_rx: err_dummy_tx: cpm_muram_free(bds_ofs); err_bds: - cpm_muram_free(pram_ofs); + if (!(mspi->flags & SPI_CPM1)) + cpm_muram_free(cpm_muram_offset(mspi->pram)); err_pram: fsl_spi_free_dummy_rx(); return -ENOMEM; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] spi: fsl-spi: fix devm_ioremap_resource() error case
devm_ioremap_resource() doesn't return NULL but an ERR_PTR on error. Reported-by: Jonas Gorsky Signed-off-by: Christophe Leroy --- drivers/spi/spi-fsl-cpm.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/spi/spi-fsl-cpm.c b/drivers/spi/spi-fsl-cpm.c index 4e5c945..1e66644 100644 --- a/drivers/spi/spi-fsl-cpm.c +++ b/drivers/spi/spi-fsl-cpm.c @@ -305,10 +305,15 @@ int fsl_spi_cpm_init(struct mpc8xxx_spi *mspi) if (mspi->flags & SPI_CPM1) { struct resource *res; + void *pram; res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1); - mspi->pram = devm_ioremap_resource(dev, res); + pram = devm_ioremap_resource(dev, res); + if (IS_ERR(pram)) + mspi->pram = NULL; + else + mspi->pram = pram; } else { unsigned long pram_ofs = fsl_spi_cpm_get_pram(mspi); -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: powerpc32: rearrange instructions order in ip_fast_csum()
Le 25/03/2015 02:22, Scott Wood a écrit : On Tue, Feb 03, 2015 at 12:39:27PM +0100, LEROY Christophe wrote: Signed-off-by: Christophe Leroy --- arch/powerpc/lib/checksum_32.S | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 6d67e05..5500704 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -26,13 +26,17 @@ _GLOBAL(ip_fast_csum) lwz r0,0(r3) lwzur5,4(r3) - addic. r4,r4,-2 + addic. r4,r4,-4 addcr0,r0,r5 mtctr r4 blelr- -1: lwzur4,4(r3) - adder0,r0,r4 + lwzur5,4(r3) + lwzur4,4(r3) The blelr is pointless since len is guaranteed to be >= 5 (assuming that comment is accurate), but now it's both pointless and in the wrong place, since you haven't yet finished the four words that you subtracted from r4. The blelr is just there to protect the function against negative value of r4 hence ctr. In any case, the returned result in that case in not correct, has we do not touch r3. How about keeping the blelr, without the -, moving it after the initial words, and changing the number of inital words to 5? We can't just do blelr, we would need to fold the result first. But indeed, this would be useless because I quickly checked and it seems that all functions calling ip_fast_csum() check that the length is not lower than 5. So I will just remove the blelr Also maybe do all the loads up front, since many PPC chips have a three cycle load latency rather than two. ok Christophe --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 1/2] powerpc32: put csum_tcpudp_magic inline
Le 25/03/2015 03:10, Scott Wood a écrit : On Tue, 2015-02-03 at 12:39 +0100, Christophe Leroy wrote: csum_tcpudp_magic() is only a few instructions, and does not modifies any other register than the returned result. So it is not worth having it as a separate function and suffer function branching and saving of volatile registers. This patch makes it inline by use of the already existing csum_tcpudp_nofold() function. Signed-off-by: Christophe Leroy --- v2: no change arch/powerpc/include/asm/checksum.h | 15 +++ arch/powerpc/lib/checksum_32.S | 16 2 files changed, 15 insertions(+), 16 deletions(-) The 64-bit version is pretty similar to the 32-bit -- why only use csum_tcpudp_nofold() on 32-bit? I did it only on 32-bit because I have no way to test it on 64-bits, but I can do it for 64 bits as well, no problem. Christophe --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [v2,2/2] powerpc32: add support for csum_add()
Le 25/03/2015 02:30, Scott Wood a écrit : On Tue, Feb 03, 2015 at 12:39:27PM +0100, LEROY Christophe wrote: The C version of csum_add() as defined in include/net/checksum.h gives the following assembly: 0: 7c 04 1a 14 add r0,r4,r3 4: 7c 64 00 10 subfc r3,r4,r0 8: 7c 63 19 10 subfe r3,r3,r3 c: 7c 63 00 50 subfr3,r3,r0 include/net/checksum.h also offers the possibility to define an arch specific function. This patch provides a ppc32 specific csum_add() inline function. What makes it 32-bit specific? As far as I understand, the 64-bit will do a 64 bit addition, so we will have to handle differently the carry, can't just be an addze like in 32-bit. The generated code is most likely different on ppc64. I have no ppc64 compiler so I can't check what gcc generates for the following code: |__wsumcsum_add(__wsum csum, __wsum addend) { u32 res= (__force u32)csum; res+= (__force u32)addend; return (__force __wsum)(res+ (res< (__force u32)addend)); }| Can someone with a ppc64 compiler tell what we get ? Christophe --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 0/4] powerpc32: use cacheable alternatives of memcpy and memset
This patchset implements use of cacheable versions of memset and memcpy when the len is greater than the cacheline size and the destination is in RAM. On MPC885, we observe a 7% rate increase on FTP transfer Christophe Leroy (4): Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions" powerpc32: swap r4 and r5 in cacheable_memzero powerpc32: memset(0): use cacheable_memzero powerpc32: memcpy: use cacheable_memcpy arch/powerpc/lib/copy_32.S | 148 + 1 file changed, 148 insertions(+) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/4] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"
This partially reverts commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions ("f909a35bdfb7cb350d078a2cf888162eeb20381c")' Functions cacheable_memcpy/memzero are more efficient than memcpy/memset as they use the dcbz instruction which avoids refill of the cacheline with the data that we will overwrite. Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 127 + 1 file changed, 127 insertions(+) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 6813f80..55f19f9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -69,6 +69,54 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addir6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwur4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subfr6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwir9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1/* total number of complete cachelines */ + ble 2f + xorir0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwur4,4(r6) + bdnz4b +3: mtctr r9 + li r7,4 +10:dcbzr7,r6 + addir6,r6,CACHELINE_BYTES + bdnz10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addir5,r5,4 +2: srwir0,r5,2 + mtctr r0 + bdz 6f +1: stwur4,4(r6) + bdnz1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addir6,r6,3 +8: stbur4,1(r6) + bdnz8b + blr + _GLOBAL(memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 @@ -94,6 +142,85 @@ _GLOBAL(memset) bdnz8b blr +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5/* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt memcpy /* if regions overlap */ + + addir4,r4,-4 + addir6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK/* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subfr5,r0,r5 + mtctr r8 + beq+61f +70:lbz r9,4(r4)/* do some bytes */ + stb r9,4(r6) + addir4,r4,1 + addir6,r6,1 + bdnz70b +61:srwi. r0,r0,2 + mtctr r0 + beq 58f +72:lwzur9,4(r4)/* do some words */ + stwur9,4(r6) + bdnz72b + +58:srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: + dcbzr11,r6 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 32 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_BYTES >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz53b + +63:srwi. r0,r5,2 + mtctr r0 + beq 64f +30:lwzur0,4(r4) + stwur0,4(r6) + bdnz30b + +64:andi. r0,r5,3 + mtctr r0 + beq+65f +40:lbz r0,4(r4) + stb r0,4(r6) + addir4,r4,1 + addir6,r6,1 + bdnz40b +65:blr + _GLOBAL(memmove) cmplw 0,r3,r4 bgt backwards_memcpy -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/4] powerpc32: swap r4 and r5 in cacheable_memzero
We swap r4 and r5, this avoids having to move the len contained in r4 into r5 Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 29 ++--- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 55f19f9..cbca76c 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -75,18 +75,17 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * area is cacheable. -- paulus */ _GLOBAL(cacheable_memzero) - mr r5,r4 - li r4,0 + li r5,0 addir6,r3,-4 - cmplwi 0,r5,4 + cmplwi 0,r4,4 blt 7f - stwur4,4(r6) + stwur5,4(r6) beqlr andi. r0,r6,3 - add r5,r0,r5 + add r4,r0,r4 subfr6,r0,r6 clrlwi r7,r6,32-LG_CACHELINE_BYTES - add r8,r7,r5 + add r8,r7,r4 srwir9,r8,LG_CACHELINE_BYTES addic. r9,r9,-1/* total number of complete cachelines */ ble 2f @@ -94,26 +93,26 @@ _GLOBAL(cacheable_memzero) srwi. r0,r0,2 beq 3f mtctr r0 -4: stwur4,4(r6) +4: stwur5,4(r6) bdnz4b 3: mtctr r9 li r7,4 10:dcbzr7,r6 addir6,r6,CACHELINE_BYTES bdnz10b - clrlwi r5,r8,32-LG_CACHELINE_BYTES - addir5,r5,4 -2: srwir0,r5,2 + clrlwi r4,r8,32-LG_CACHELINE_BYTES + addir4,r4,4 +2: srwir0,r4,2 mtctr r0 bdz 6f -1: stwur4,4(r6) +1: stwur5,4(r6) bdnz1b -6: andi. r5,r5,3 -7: cmpwi 0,r5,0 +6: andi. r4,r4,3 +7: cmpwi 0,r4,0 beqlr - mtctr r5 + mtctr r4 addir6,r6,3 -8: stbur4,1(r6) +8: stbur5,1(r6) bdnz8b blr -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/4] powerpc32: memset(0): use cacheable_memzero
cacheable_memzero uses dcbz instruction and is more efficient than memset(0) when the destination is in RAM This patch renames memset as generic_memset, and defines memset as a prolog to cacheable_memzero. This prolog checks if the byte to set is 0 and if the buffer is in RAM. If not, it falls back to generic_memcpy() Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index cbca76c..d8a9a86 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -12,6 +12,7 @@ #include #include #include +#include #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -74,6 +75,18 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * to set them to zero. This requires that the destination * area is cacheable. -- paulus */ +_GLOBAL(memset) + cmplwi r4,0 + bne-generic_memset + cmplwi r5,L1_CACHE_BYTES + blt-generic_memset + lis r8,max_pfn@ha + lwz r8,max_pfn@l(r8) + tophys (r9,r3) + srwir9,r9,PAGE_SHIFT + cmplw r9,r8 + bge-generic_memset + mr r4,r5 _GLOBAL(cacheable_memzero) li r5,0 addir6,r3,-4 @@ -116,7 +129,7 @@ _GLOBAL(cacheable_memzero) bdnz8b blr -_GLOBAL(memset) +_GLOBAL(generic_memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 addir6,r3,-4 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 4/4] powerpc32: memcpy: use cacheable_memcpy
cacheable_memcpy uses dcbz instruction and is more efficient than memcpy when the destination is in RAM This patch renames memcpy as generic_memcpy, and defines memcpy as a prolog to cacheable_memcpy. This prolog checks if the buffer is in RAM. If not, it falls back to generic_memcpy() On MPC885, we get approximatly 7% increase of the transfer rate on an FTP reception Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 23 --- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index d8a9a86..8f76d49 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -161,13 +161,27 @@ _GLOBAL(generic_memset) * We only use this version if the source and dest don't overlap. * -- paulus. */ +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) + cmplwi r5,L1_CACHE_BYTES + blt-generic_memcpy + lis r8,max_pfn@ha + lwz r8,max_pfn@l(r8) + tophys (r9,r3) + srwir9,r9,PAGE_SHIFT + cmplw r9,r8 + bge-generic_memcpy _GLOBAL(cacheable_memcpy) add r7,r3,r5/* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 cmplw 1,r3,r8 crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt memcpy /* if regions overlap */ + blt generic_memcpy /* if regions overlap */ addir4,r4,-4 addir6,r3,-4 @@ -233,12 +247,7 @@ _GLOBAL(cacheable_memcpy) bdnz40b 65:blr -_GLOBAL(memmove) - cmplw 0,r3,r4 - bgt backwards_memcpy - /* fall through */ - -_GLOBAL(memcpy) +_GLOBAL(generic_memcpy) srwi. r7,r5,3 addir6,r3,-4 addir4,r4,-4 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 3/4] powerpc32: memset(0): use cacheable_memzero
Le 14/05/2015 02:55, Scott Wood a écrit : On Tue, 2015-05-12 at 15:32 +0200, Christophe Leroy wrote: cacheable_memzero uses dcbz instruction and is more efficient than memset(0) when the destination is in RAM This patch renames memset as generic_memset, and defines memset as a prolog to cacheable_memzero. This prolog checks if the byte to set is 0 and if the buffer is in RAM. If not, it falls back to generic_memcpy() Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 15 ++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index cbca76c..d8a9a86 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -12,6 +12,7 @@ #include #include #include +#include #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -74,6 +75,18 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * to set them to zero. This requires that the destination * area is cacheable. -- paulus */ +_GLOBAL(memset) + cmplwi r4,0 + bne-generic_memset + cmplwi r5,L1_CACHE_BYTES + blt-generic_memset + lis r8,max_pfn@ha + lwz r8,max_pfn@l(r8) + tophys (r9,r3) + srwir9,r9,PAGE_SHIFT + cmplw r9,r8 + bge-generic_memset + mr r4,r5 max_pfn includes highmem, and tophys only works on normal kernel addresses. Is there any other simple way to determine whether an address is in RAM or not ? I did that because of the below function from mm/mem.c |int page_is_ram(unsigned long pfn) { #ifndef CONFIG_PPC64/* XXX for now */ return pfn< max_pfn; #else unsigned long paddr= (pfn<< PAGE_SHIFT); struct memblock_region*reg; for_each_memblock(memory, reg) if (paddr>= reg->base&& paddr< (reg->base+ reg->size)) return 1; return 0; #endif } | If we were to point memset_io, memcpy_toio, etc. at noncacheable versions, are there any other callers left that can reasonably point at uncacheable memory? Do you mean we could just consider that memcpy() and memset() are called only with destination on RAM and thus we could avoid the check ? copy_tofrom_user() already does this assumption (allthought a user app could possibly provide a buffer located in an ALSA mapped IO area) Christophe --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 1/4] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"
Le 14/05/2015 02:49, Scott Wood a écrit : On Tue, 2015-05-12 at 15:32 +0200, Christophe Leroy wrote: This partially reverts commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions ("f909a35bdfb7cb350d078a2cf888162eeb20381c")' I don't have that SHA. Do you mean b05ae4ee602b7dc90771408ccf0972e1b3801a35? Right, took it from the wrong tree sorry. Functions cacheable_memcpy/memzero are more efficient than memcpy/memset as they use the dcbz instruction which avoids refill of the cacheline with the data that we will overwrite. I don't see anything in this patchset that addresses the "NOTE: The old routines are just flat buggy on kernels that support hardware with different cacheline sizes" comment. I believe the NOTE means that if a kernel is compiled for several CPUs having different cache line size, then it will not work. But it is also the case of other functions using dcbz instruction, like copy_page() clear_page() copy_tofrom_user(). And indeed, this seems only possible in three cases: 1/ With CONFIG_44x as 47x has different size than 44x and 46x. However it is explicitly stated in arch/powerpc/platforms/44x/Kconfig : "config PPC_47x This option enables support for the 47x family of processors and is not currently compatible with other 44x or 46x varients" 2/ With CONFIG_PPC_85xx, as PPC_E500MC has different size than other E500. However it is explicitly stated in arch/powerpc/platforms/Kconfig.cputype : "config PPC_E500MC This must be enabled for running on e500mc (and derivatives such as e5500/e6500), and must be disabled for running on e500v1 or e500v2." 3/ With CONFIG_403GCX as 403GCX has different size than other 40x. However it seems to be no way to select CONFIG_403GCX from arch/powerpc/platforms/40x/Kconfig Christophe --- L'absence de virus dans ce courrier électronique a été vérifiée par le logiciel antivirus Avast. http://www.avast.com ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 0/6] powerpc32: replace memcpy and memset by cacheable alternatives
This patchset implements use of cacheable versions of memset and memcpy since when the destination is not cacheable, memset_io and memcpy_toio are used. On MPC885, we observe a 7% rate increase on FTP transfer Christophe Leroy (6): powerpc: use memset_io() to clear CPM Muram Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions" powerpc32: memset(0): use cacheable_memzero powerpc32: Merge the new memset() with the old one powerpc32: cacheable_memcpy becomes memcpy powerpc32: Few optimisations in memcpy arch/powerpc/lib/copy_32.S | 109 ++- arch/powerpc/sysdev/cpm_common.c | 2 +- 2 files changed, 109 insertions(+), 2 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/6] powerpc: use memset_io() to clear CPM Muram
CPM muram is not cached, so use memset_io() instead of memset() Signed-off-by: Christophe Leroy --- arch/powerpc/sysdev/cpm_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 4f78695..e2ea519 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -147,7 +147,7 @@ unsigned long cpm_muram_alloc(unsigned long size, unsigned long align) spin_lock_irqsave(&cpm_muram_lock, flags); cpm_muram_info.alignment = align; start = rh_alloc(&cpm_muram_info, size, "commproc"); - memset(cpm_muram_addr(start), 0, size); + memset_io(cpm_muram_addr(start), 0, size); spin_unlock_irqrestore(&cpm_muram_lock, flags); return start; -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 2/6] Partially revert "powerpc: Remove duplicate cacheable_memcpy/memzero functions"
This partially reverts commit 'powerpc: Remove duplicate cacheable_memcpy/memzero functions ("b05ae4ee602b7dc90771408ccf0972e1b3801a35")' Functions cacheable_memcpy/memzero are more efficient than memcpy/memset as they use the dcbz instruction which avoids refill of the cacheline with the data that we will overwrite. Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 127 + 1 file changed, 127 insertions(+) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 6813f80..55f19f9 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -69,6 +69,54 @@ CACHELINE_BYTES = L1_CACHE_BYTES LG_CACHELINE_BYTES = L1_CACHE_SHIFT CACHELINE_MASK = (L1_CACHE_BYTES-1) +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addir6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwur4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subfr6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwir9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1/* total number of complete cachelines */ + ble 2f + xorir0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwur4,4(r6) + bdnz4b +3: mtctr r9 + li r7,4 +10:dcbzr7,r6 + addir6,r6,CACHELINE_BYTES + bdnz10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addir5,r5,4 +2: srwir0,r5,2 + mtctr r0 + bdz 6f +1: stwur4,4(r6) + bdnz1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addir6,r6,3 +8: stbur4,1(r6) + bdnz8b + blr + _GLOBAL(memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 @@ -94,6 +142,85 @@ _GLOBAL(memset) bdnz8b blr +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5/* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt memcpy /* if regions overlap */ + + addir4,r4,-4 + addir6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK/* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subfr5,r0,r5 + mtctr r8 + beq+61f +70:lbz r9,4(r4)/* do some bytes */ + stb r9,4(r6) + addir4,r4,1 + addir6,r6,1 + bdnz70b +61:srwi. r0,r0,2 + mtctr r0 + beq 58f +72:lwzur9,4(r4)/* do some words */ + stwur9,4(r6) + bdnz72b + +58:srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: + dcbzr11,r6 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 32 + COPY_16_BYTES +#if L1_CACHE_BYTES >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_BYTES >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz53b + +63:srwi. r0,r5,2 + mtctr r0 + beq 64f +30:lwzur0,4(r4) + stwur0,4(r6) + bdnz30b + +64:andi. r0,r5,3 + mtctr r0 + beq+65f +40:lbz r0,4(r4) + stb r0,4(r6) + addir4,r4,1 + addir6,r6,1 + bdnz40b +65:blr + _GLOBAL(memmove) cmplw 0,r3,r4 bgt backwards_memcpy -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 3/6] powerpc32: memset(0): use cacheable_memzero
cacheable_memzero uses dcbz instruction and is more efficient than memset(0) when the destination is in RAM This patch renames memset as generic_memset, and defines memset as a prolog to cacheable_memzero. This prolog checks if the byte to set is 0. If not, it falls back to generic_memcpy() cacheable_memzero disappears as it is not referenced anywhere anymore Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 55f19f9..0b4f954 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -74,9 +74,9 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * to set them to zero. This requires that the destination * area is cacheable. -- paulus */ -_GLOBAL(cacheable_memzero) - mr r5,r4 - li r4,0 +_GLOBAL(memset) + cmplwi r4,0 + bne-generic_memset addir6,r3,-4 cmplwi 0,r5,4 blt 7f @@ -117,7 +117,7 @@ _GLOBAL(cacheable_memzero) bdnz8b blr -_GLOBAL(memset) +_GLOBAL(generic_memset) rlwimi r4,r4,8,16,23 rlwimi r4,r4,16,0,15 addir6,r3,-4 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 4/6] powerpc32: Merge the new memset() with the old one
cacheable_memzero() which has become the new memset() and the old memset() are quite similar, so just merge them. Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 34 +++--- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 0b4f954..9262071 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -75,8 +75,9 @@ CACHELINE_MASK = (L1_CACHE_BYTES-1) * area is cacheable. -- paulus */ _GLOBAL(memset) - cmplwi r4,0 - bne-generic_memset + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addir6,r3,-4 cmplwi 0,r5,4 blt 7f @@ -85,6 +86,9 @@ _GLOBAL(memset) andi. r0,r6,3 add r5,r0,r5 subfr6,r0,r6 + cmplwi 0,r4,0 + bne 2f /* Use normal procedure if r4 is not zero */ + clrlwi r7,r6,32-LG_CACHELINE_BYTES add r8,r7,r5 srwir9,r8,LG_CACHELINE_BYTES @@ -103,32 +107,8 @@ _GLOBAL(memset) bdnz10b clrlwi r5,r8,32-LG_CACHELINE_BYTES addir5,r5,4 -2: srwir0,r5,2 - mtctr r0 - bdz 6f -1: stwur4,4(r6) - bdnz1b -6: andi. r5,r5,3 -7: cmpwi 0,r5,0 - beqlr - mtctr r5 - addir6,r6,3 -8: stbur4,1(r6) - bdnz8b - blr -_GLOBAL(generic_memset) - rlwimi r4,r4,8,16,23 - rlwimi r4,r4,16,0,15 - addir6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwur4,4(r6) - beqlr - andi. r0,r6,3 - add r5,r0,r5 - subfr6,r0,r6 - srwir0,r5,2 +2: srwir0,r5,2 mtctr r0 bdz 6f 1: stwur4,4(r6) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 5/6] powerpc32: cacheable_memcpy becomes memcpy
cacheable_memcpy uses dcbz instruction and is more efficient than memcpy when the destination is in RAM. If the destination is in an io area, memcpy_toio() is normally used, not memcpy This patch renames memcpy as generic_memcpy, and renames cacheable_memcpy as memcpy On MPC885, we get approximatly 7% increase of the transfer rate on an FTP reception Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 9262071..1d49c74 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -129,13 +129,18 @@ _GLOBAL(memset) * We only use this version if the source and dest don't overlap. * -- paulus. */ -_GLOBAL(cacheable_memcpy) +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) add r7,r3,r5/* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 cmplw 1,r3,r8 crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt memcpy /* if regions overlap */ + blt generic_memcpy /* if regions overlap */ addir4,r4,-4 addir6,r3,-4 @@ -201,12 +206,7 @@ _GLOBAL(cacheable_memcpy) bdnz40b 65:blr -_GLOBAL(memmove) - cmplw 0,r3,r4 - bgt backwards_memcpy - /* fall through */ - -_GLOBAL(memcpy) +_GLOBAL(generic_memcpy) srwi. r7,r5,3 addir6,r3,-4 addir4,r4,-4 -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 6/6] powerpc32: Few optimisations in memcpy
This patch adds a few optimisations in memcpy functions by using lbzu/stbu instead of lxb/stb and by re-ordering insn inside a loop to reduce latency due to loading Signed-off-by: Christophe Leroy --- arch/powerpc/lib/copy_32.S | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 1d49c74..2ef50c6 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -155,9 +155,9 @@ _GLOBAL(memcpy) mtctr r8 beq+61f 70:lbz r9,4(r4)/* do some bytes */ - stb r9,4(r6) addir4,r4,1 addir6,r6,1 + stb r9,3(r6) bdnz70b 61:srwi. r0,r0,2 mtctr r0 @@ -199,10 +199,10 @@ _GLOBAL(memcpy) 64:andi. r0,r5,3 mtctr r0 beq+65f -40:lbz r0,4(r4) - stb r0,4(r6) - addir4,r4,1 - addir6,r6,1 + addir4,r4,3 + addir6,r6,3 +40:lbzur0,1(r4) + stbur0,1(r6) bdnz40b 65:blr -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 0/2] Optimise some IP checksum functions.
This patchset provides a few optimisations related to IP checksum functions. Christophe Leroy (2): powerpc: put csum_tcpudp_magic inline powerpc: add support for csum_add() arch/powerpc/include/asm/checksum.h | 37 - arch/powerpc/lib/checksum_32.S | 16 arch/powerpc/lib/checksum_64.S | 21 - 3 files changed, 28 insertions(+), 46 deletions(-) -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/2] powerpc: put csum_tcpudp_magic inline
csum_tcpudp_magic() is only a few instructions, and does modify really few registers. So it is not worth having it as a separate function and suffer function branching and saving of volatile registers. This patch makes it inline by use of the already existing csum_tcpudp_nofold() function. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/checksum.h | 21 - arch/powerpc/lib/checksum_32.S | 16 arch/powerpc/lib/checksum_64.S | 21 - 3 files changed, 12 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 8251a3b..5e43d2d 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -20,15 +20,6 @@ extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); /* - * computes the checksum of the TCP/UDP pseudo-header - * returns a 16-bit checksum, already complemented - */ -extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum); - -/* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * @@ -127,6 +118,18 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, #endif } +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented + */ +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + unsigned short len, + unsigned short proto, + __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + #endif #endif /* __KERNEL__ */ #endif diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index e23a436..d6fab08 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -41,22 +41,6 @@ _GLOBAL(ip_fast_csum) blr /* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(saddr, daddr, len, proto, sum) - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addcr0,r3,r4/* add 4 32-bit words together */ - adder0,r0,r5 - adder0,r0,r7 - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwir3,r3,16 - blr - -/* * computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit) * diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 57a0720..f3ef354 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -45,27 +45,6 @@ _GLOBAL(ip_fast_csum) blr /* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) - * No real gain trying to do this specially for 64 bit, but - * the 32 bit addition may spill into the upper bits of - * the doubleword so we still must fold it down from 64. - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addcr0,r3,r4/* add 4 32-bit words together */ - adder0,r0,r5 - adder0,r0,r7 -rldicl r4,r0,32,0 /* fold 64 bit value */ -add r0,r4,r0 -srdir0,r0,32 - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwir3,r3,16 - blr - -/* * Computes the checksum of a memory block at buff, length len, * and adds in "sum" (32-bit). * -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 2/2] powerpc: add support for csum_add()
The C version of csum_add() as defined in include/net/checksum.h gives the following assembly in ppc32: 0: 7c 04 1a 14 add r0,r4,r3 4: 7c 64 00 10 subfc r3,r4,r0 8: 7c 63 19 10 subfe r3,r3,r3 c: 7c 63 00 50 subfr3,r3,r0 and the following in ppc64: 0xc0001af8 <+0>: add r3,r3,r4 0xc0001afc <+4>: cmplw cr7,r3,r4 0xc0001b00 <+8>: mfcrr4 0xc0001b04 <+12>:rlwinm r4,r4,29,31,31 0xc0001b08 <+16>:add r3,r4,r3 0xc0001b0c <+20>:clrldi r3,r3,32 0xc0001b10 <+24>:blr include/net/checksum.h also offers the possibility to define an arch specific function. This patch provides a specific csum_add() inline function. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/checksum.h | 16 1 file changed, 16 insertions(+) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 5e43d2d..e8d9ef4 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -130,6 +130,22 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } +#define HAVE_ARCH_CSUM_ADD +static inline __wsum csum_add(__wsum csum, __wsum addend) +{ +#ifdef __powerpc64__ + u64 res = (__force u64)csum; + + res += (__force u64)addend; + return (__force __wsum)((u32)res + (res >> 32)); +#else + asm("addc %0,%0,%1;" + "addze %0,%0;" + : "+r" (csum) : "r" (addend)); + return csum; +#endif +} + #endif #endif /* __KERNEL__ */ #endif -- 2.1.0 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 00/19] powerpc/8xx: Optimise MMU TLB handling and add support of 16k pages
This patchset: 1) provides several MMU TLB handling optimisation on MPC8xx. 2) adds support of 16k pages on MPC8xx. All changes have been successfully tested on a custom board equipped with MPC885 The two differences with first version of the patch are: 1) I removed the patch number 10, which was implementing a 16 bit alignment of the PGDIR. It is not worth potentially wasting up to 64k of memory just for removing one instruction (ori). 2) I managed to preserve r11 while calculating the level 2 address, therefore no more need to save r11 into CR. Signed-off-by: Christophe Leroy Tested-by: Christophe Leroy arch/powerpc/Kconfig |2 +- arch/powerpc/include/asm/mmu-8xx.h |2 + arch/powerpc/include/asm/pgtable-ppc32.h | 21 ++ arch/powerpc/include/asm/pte-8xx.h |7 +- arch/powerpc/include/asm/reg.h |3 +- arch/powerpc/kernel/head_8xx.S | 342 +++- 6 files changed, 187 insertions(+), 190 deletions(-) ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 01/19] powerpc/8xx: Declare SPRG2 as a SCRATCH register
Since coming 469d62be9263b92f2c3329540cbb1c076111f4f3, SPRG2 is used as a scratch register just like SPRG0 and SPRG1. So Declare it as such and fix the comment which is not valid anymore since that commit. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/reg.h |3 ++- arch/powerpc/kernel/head_8xx.S | 10 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index cb9c174..b6a7d62 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -888,7 +888,7 @@ * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors - * - SPRG2 apparently unused but initialized + * - SPRG2 scratch for exception vectors * */ #ifdef CONFIG_PPC64 @@ -994,6 +994,7 @@ #ifdef CONFIG_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 +#define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 #endif diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 55d12fb..1329c5a 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -301,7 +301,7 @@ InstructionTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 @@ -363,7 +363,7 @@ InstructionTLBMiss: mfspr r10, SPRN_DAR mtcrr10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcrr11 @@ -386,7 +386,7 @@ InstructionTLBMiss: mtcrr10 li r11, 0x00f0 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else lwz r11, 0(r0) mtcrr11 @@ -409,7 +409,7 @@ DataStoreTLBMiss: stw r11, 4(r0) #else mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG2, r11 + mtspr SPRN_SPRG_SCRATCH2, r11 #endif mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ @@ -487,7 +487,7 @@ DataStoreTLBMiss: mfspr r10, SPRN_DAR mtcrr10 mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG2 + mfspr r11, SPRN_SPRG_SCRATCH2 #else mtspr SPRN_DAR, r11 /* Tag DAR */ lwz r11, 0(r0) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 02/19] powerpc/8xx: Use SCRATCH0 and SCRATCH1 also for TLB handlers
SCRATCH0 and SCRATCH1 are only used in Exceptions prologs where no other exception can happen. There is therefore no need to preserve them accross TLB handlers, we can use them there as in other exceptions. One of the advantages is that they do not suffer CPU6 errata unlike M_TW register. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 104 -- 1 files changed, 36 insertions(+), 68 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 1329c5a..3af6db1 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -104,12 +104,15 @@ turn_on_mmu: * task's thread_struct. */ #define EXCEPTION_PROLOG \ - mtspr SPRN_SPRG_SCRATCH0,r10; \ - mtspr SPRN_SPRG_SCRATCH1,r11; \ - mfcrr10;\ + EXCEPTION_PROLOG_0; \ EXCEPTION_PROLOG_1; \ EXCEPTION_PROLOG_2 +#define EXCEPTION_PROLOG_0 \ + mtspr SPRN_SPRG_SCRATCH0,r10; \ + mtspr SPRN_SPRG_SCRATCH1,r11; \ + mfcrr10 + #define EXCEPTION_PROLOG_1 \ mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ andi. r11,r11,MSR_PR; \ @@ -145,6 +148,14 @@ turn_on_mmu: SAVE_2GPRS(7, r11) /* + * Exception exit code. + */ +#define EXCEPTION_EPILOG_0 \ + mtcrr10;\ + mfspr r10,SPRN_SPRG_SCRATCH0; \ + mfspr r11,SPRN_SPRG_SCRATCH1 + +/* * Note: code which follows this uses cr0.eq (set if from kernel), * r11, r12 (SRR0), and r9 (SRR1). * @@ -293,16 +304,8 @@ InstructionTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 addir11, r10, 0x1000 @@ -359,18 +362,11 @@ InstructionTLBMiss: mtspr SPRN_MI_RPN, r10/* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi 2: mfspr r11, SPRN_SRR1 @@ -381,19 +377,11 @@ InstructionTLBMiss: mtspr SPRN_SRR1, r11 /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - li r11, 0x00f0 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 b InstructionAccess . = 0x1200 @@ -401,16 +389,8 @@ DataStoreTLBMiss: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 -#ifdef CONFIG_8xx_CPU6 - stw r10, 0(r0) - stw r11, 4(r0) -#else - mtspr SPRN_DAR, r10 - mtspr SPRN_SPRG_SCRATCH2, r11 -#endif + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ /* If we are faulting a kernel address, we have to use the @@ -483,19 +463,12 @@ DataStoreTLBMiss: mtspr SPRN_MD_RPN, r10/* Update TLB entry */ /* Restore registers */ -#ifndef CONFIG_8xx_CPU6 - mfspr r10, SPRN_DAR - mtcrr10 - mtspr SPRN_DAR, r11 /* Tag DAR */ - mfspr r11, SPRN_SPRG_SCRATCH2 -#else - mtspr SPRN_DAR, r11 /* Tag DAR */ - lwz r11, 0(r0) - mtcrr11 - lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif - mfspr r10, SPRN_M_TW + mtspr SPRN_DAR, r11 /* Tag DAR */ + mfspr r10, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 rfi /* This is an instruction TLB error on the MPC8xx. This could be due @@ -519,23 +492,18 @@ DataTLBError: #ifdef CONFIG_8xx_CPU6 stw r3, 8(r0) #endif - DO_8xx_CPU6(0x3f80, r3) - mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ - mfcrr10 - stw r10, 0(r0) - stw r11, 4(r0) + EXCEPTION_PROLOG_0 + mtspr SPRN_SPRG_SCRATCH2
[PATCH v2 03/19] powerpc/8xx: exception InstructionAccess does not exist on MPC8xx
Exception InstructionAccess does not exist on MPC8xx. No need to branch there from somewhere else. Handling can be done directly in InstructionTLBError Exception. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 17 +++-- 1 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 3af6db1..fbe5d10 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -234,15 +234,9 @@ DataAccess: EXC_XFER_LITE(0x300, handle_page_fault) /* Instruction access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ - . = 0x400 -InstructionAccess: - EXCEPTION_PROLOG - mr r4,r12 - mr r5,r9 - EXC_XFER_LITE(0x400, handle_page_fault) + EXCEPTION(0x400, InstructionAccess, unknown_exception, EXC_XFER_STD) /* External interrupt */ EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) @@ -382,7 +376,7 @@ InstructionTLBMiss: #endif mfspr r10, SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 - b InstructionAccess + b InstructionTLBError . = 0x1200 DataStoreTLBMiss: @@ -477,7 +471,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - b InstructionAccess + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + EXC_XFER_LITE(0x1300, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to * many reasons, including a dirty update to a pte. We can catch that -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 05/19] powerpc/8xx: Fix comment about DIRTY update
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, dirty handling is not handled here anymore. So we fix the comment. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |8 ++-- 1 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e59e39e..171c6ef 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -477,12 +477,8 @@ InstructionTLBError: EXC_XFER_LITE(0x1300, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to - * many reasons, including a dirty update to a pte. We can catch that - * one here, but anything else is an error. First, we track down the - * Linux pte. If it is valid, write access is allowed, but the - * page dirty bit is not set, we will set it and reload the TLB. For - * any other case, we bail out to a higher level function that can - * handle it. + * many reasons, including a dirty update to a pte. We bail out to + * a higher level function that can handle it. */ . = 0x1400 DataTLBError: -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 04/19] powerpc/8xx: Remove loading of r10 at end of FixupDAR
Since commit 2321f33790a6c5b80322d907a92d5739e7521a13, r10 is not used anymore after FixupDAR. There is therefore no need to set it up with the value of DAR. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |7 +++ 1 files changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index fbe5d10..e59e39e 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -495,7 +495,7 @@ DataTLBError: mfspr r10, SPRN_DAR cmpwi cr0, r10, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ -DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR */ +DARFixed:/* Return from dcbx instruction bug workaround */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) #endif @@ -524,7 +524,7 @@ DARFixed:/* Return from dcbx instruction bug workaround, r10 holds value of DAR /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. - * DAR is set to the calculated address and r10 also holds the EA on exit. + * DAR is set to the calculated address. */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE @@ -564,8 +564,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+142f -141: mfspr r10, SPRN_DAR /* r10 must hold DAR at exit */ - b DARFixed/* Nope, go back to normal TLB processing */ +141: b DARFixed/* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 10/19] powerpc/8xx: Duplicate two insns instead of branching
Branching takes two cycles on MPC8xx. Lets duplicate the two instructions and avoid the branching. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |6 -- 1 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5037420..4a49ff3 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -638,9 +638,11 @@ modified_instr: /* special handling for r10,r11 since these are modified already */ 153: mfspr r11, SPRN_SPRG_SCRATCH1 /* load r11 from SPRN_SPRG_SCRATCH1 */ - b 155f + add r10, r10, r11 /* add it */ + mfctr r11 /* restore r11 */ + b 151b 154: mfspr r11, SPRN_SPRG_SCRATCH0 /* load r10 from SPRN_SPRG_SCRATCH0 */ -155: add r10, r10, r11 /* add it */ + add r10, r10, r11 /* add it */ mfctr r11 /* restore r11 */ b 151b #endif -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 06/19] powerpc/8xx: No need to save r10 and r3 when not calling FixupDAR
r10 and r3 are only used inside FixupDAR function. So lets save them inside that function only. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 27 +-- 1 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 171c6ef..845abf8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -482,20 +482,12 @@ InstructionTLBError: */ . = 0x1400 DataTLBError: -#ifdef CONFIG_8xx_CPU6 - stw r3, 8(r0) -#endif EXCEPTION_PROLOG_0 - mtspr SPRN_SPRG_SCRATCH2, r10 - mfspr r10, SPRN_DAR - cmpwi cr0, r10, 0x00f0 + mfspr r11, SPRN_DAR + cmpwi cr0, r11, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) -#endif - mfspr r10,SPRN_SPRG_SCRATCH2 EXCEPTION_EPILOG_0 b DataAccess @@ -525,6 +517,10 @@ DARFixed:/* Return from dcbx instruction bug workaround */ /* define if you don't want to use self modifying code */ #define NO_SELF_MODIFYING_CODE FixupDAR:/* Entry point for dcbx workaround. */ +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address >= 0x8000 */ @@ -540,6 +536,9 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_MD_TWC, r11/* Load pte table base address */ mfspr r11, SPRN_MD_TWC/* and get the pte address */ lwz r11, 0(r11) /* Get the pte */ +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 20, 31 lwz r11,0(r11) @@ -560,15 +559,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq+142f cmpwi cr0, r10, 1964 /* Is icbi? */ beq+142f -141: b DARFixed/* Nope, go back to normal TLB processing */ +141: mfspr r10,SPRN_SPRG_SCRATCH2 + b DARFixed/* Nope, go back to normal TLB processing */ 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 142: /* continue, it was a dcbx, dcbi instruction. */ -#ifdef CONFIG_8xx_CPU6 - lwz r3, 8(r0) /* restore r3 from memory */ -#endif #ifndef NO_SELF_MODIFYING_CODE andis. r10,r11,0x1f/* test if reg RA is r0 */ li r10,modified_instr@l @@ -587,6 +584,7 @@ modified_instr: bne+143f subfr10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */ 143: mtdar r10 /* store faulting EA in DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed/* Go back to normal TLB handling */ #else mfctr r10 @@ -640,6 +638,7 @@ modified_instr: mfdar r11 mtctr r11 /* restore ctr reg from DAR */ mtdar r10 /* save fault EA to DAR */ + mfspr r10,SPRN_SPRG_SCRATCH2 b DARFixed/* Go back to normal TLB handling */ /* special handling for r10,r11 since these are modified already */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 15/19] powerpc/8xx: Implement 16k pages
This patch activates the handling of 16k pages on the MPC8xx. Signed-off-by: Christophe Leroy --- arch/powerpc/Kconfig |2 +- arch/powerpc/include/asm/mmu-8xx.h |2 ++ arch/powerpc/kernel/head_8xx.S |4 3 files changed, 7 insertions(+), 1 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 5f44d3b..dc5f64e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -518,7 +518,7 @@ config PPC_4K_PAGES bool "4k page size" config PPC_16K_PAGES - bool "16k page size" if 44x + bool "16k page size" if 44x || PPC_8xx config PPC_64K_PAGES bool "64k page size" if 44x || PPC_STD_MMU_64 || PPC_BOOK3E_64 diff --git a/arch/powerpc/include/asm/mmu-8xx.h b/arch/powerpc/include/asm/mmu-8xx.h index 3d11d3c..986b9e1 100644 --- a/arch/powerpc/include/asm/mmu-8xx.h +++ b/arch/powerpc/include/asm/mmu-8xx.h @@ -56,6 +56,7 @@ * additional information from the MI_EPN, and MI_TWC registers. */ #define SPRN_MI_RPN790 +#define MI_SPS16K 0x0008 /* Small page size (0 = 4k, 1 = 16k) */ /* Define an RPN value for mapping kernel memory to large virtual * pages for boot initialization. This has real page number of 0, @@ -129,6 +130,7 @@ * additional information from the MD_EPN, and MD_TWC registers. */ #define SPRN_MD_RPN798 +#define MD_SPS16K 0x0008 /* Small page size (0 = 4k, 1 = 16k) */ /* This is a temporary storage register that could be used to save * a processor working register during a tablewalk. diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 8966262..4dd6be0 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -45,7 +45,11 @@ * Value for the bits that have fixed value in RPN entries. * Also used for tagging DAR for DTLBerror. */ +#ifdef CONFIG_PPC_16K_PAGES +#define RPN_PATTERN(0x00f0 | MD_SPS16K) +#else #define RPN_PATTERN0x00f0 +#endif __HEAD _ENTRY(_stext); -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 18/19] powerpc/8xx: _PMD_PRESENT already set in level 1 entries
When a PMD entry is valid, _PMD_PRESENT is set. Therefore, forcing that bit during TLB loading is useless. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 48d3de8..bb7c816 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -340,7 +340,6 @@ InstructionTLBMiss: /* We have a pte table, so load the MI_TWC with the attributes * for this "segment." */ - ori r11,r11,1 /* Set valid bit */ MTSPR_CPU6(SPRN_MI_TWC, r11, r3)/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ /* Extract level 2 index */ @@ -417,7 +416,6 @@ DataStoreTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 09/19] powerpc/8xx: Optimize verification in FixupDAR
By XORing the upper part of the instruction code, we get a value that can directly be verified with the second test and we can remove the first test. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |6 ++ 1 files changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index e5a250c..5037420 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -542,10 +542,8 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ - srwir10, r11, 26/* check if major OP code is 31 */ - cmpwi cr0, r10, 31 - bne-141f - rlwinm r10, r11, 0, 21, 30 + xoris r10, r11, 0x7c00/* check if major OP code is 31 */ + rlwinm r10, r10, 0, 21, 5 cmpwi cr0, r10, 2028 /* Is dcbz? */ beq+142f cmpwi cr0, r10, 940 /* Is dcbi? */ -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 19/19] powerpc/8xx: Don't restore regs to save them again.
There is not need to restore r10, r11 and cr registers at this end of ITLBmiss handler as they are saved again to the same place in ITLBError handler we are jumping to. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |8 +--- 1 files changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index bb7c816..e21f0b2 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -381,8 +381,7 @@ InstructionTLBMiss: lwz r3, 8(r0) #endif mfspr r10, SPRN_SPRG_SCRATCH2 - EXCEPTION_EPILOG_0 - b InstructionTLBError + b InstructionTLBError1 . = 0x1200 DataStoreTLBMiss: @@ -471,7 +470,10 @@ DataStoreTLBMiss: */ . = 0x1300 InstructionTLBError: - EXCEPTION_PROLOG + EXCEPTION_PROLOG_0 +InstructionTLBError1: + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mr r4,r12 mr r5,r9 EXC_XFER_LITE(0x1300, handle_page_fault) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 17/19] powerpc/8xx: set PTE bit 22 off TLBmiss
No need to re-set this bit at each TLB miss. Let's set it in the PTE. Signed-off-by: Christophe Leroy --- arch/powerpc/include/asm/pgtable-ppc32.h | 21 + arch/powerpc/include/asm/pte-8xx.h |7 +-- arch/powerpc/kernel/head_8xx.S | 10 ++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h index 47edde8..c261792 100644 --- a/arch/powerpc/include/asm/pgtable-ppc32.h +++ b/arch/powerpc/include/asm/pgtable-ppc32.h @@ -172,6 +172,26 @@ static inline unsigned long pte_update(pte_t *p, #ifdef PTE_ATOMIC_UPDATES unsigned long old, tmp; +#ifdef CONFIG_PPC_8xx + unsigned long tmp2; + + __asm__ __volatile__("\ +1: lwarx %0,0,%4\n\ + andc%1,%0,%5\n\ + or %1,%1,%6\n\ + /* 0x200 == Extended encoding, bit 22 */ \ + /* Bit 22 has to be 1 if neither _PAGE_USER nor _PAGE_RW are set */ \ + rlwimi %1,%1,32-2,0x200\n /* get _PAGE_USER */ \ + rlwinm %3,%1,32-1,0x200\n /* get _PAGE_RW */ \ + or %1,%3,%1\n\ + xori%1,%1,0x200\n" + PPC405_ERR77(0,%4) +" stwcx. %1,0,%4\n\ + bne-1b" + : "=&r" (old), "=&r" (tmp), "=m" (*p), "=&r" (tmp2) + : "r" (p), "r" (clr), "r" (set), "m" (*p) + : "cc" ); +#else /* CONFIG_PPC_8xx */ __asm__ __volatile__("\ 1: lwarx %0,0,%3\n\ andc%1,%0,%4\n\ @@ -182,6 +202,7 @@ static inline unsigned long pte_update(pte_t *p, : "=&r" (old), "=&r" (tmp), "=m" (*p) : "r" (p), "r" (clr), "r" (set), "m" (*p) : "cc" ); +#endif /* CONFIG_PPC_8xx */ #else /* PTE_ATOMIC_UPDATES */ unsigned long old = pte_val(*p); *p = __pte((old & ~clr) | set); diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h index d44826e..dede1e7 100644 --- a/arch/powerpc/include/asm/pte-8xx.h +++ b/arch/powerpc/include/asm/pte-8xx.h @@ -48,19 +48,22 @@ */ #define _PAGE_RW 0x0400 /* lsb PP bits, inverted in HW */ #define _PAGE_USER 0x0800 /* msb PP bits */ +/* set when neither _PAGE_USER nor _PAGE_RW are set */ +#define _PAGE_KNLRO0x0200 #define _PMD_PRESENT 0x0001 #define _PMD_BAD 0x0ff0 #define _PMD_PAGE_MASK 0x000c #define _PMD_PAGE_8M 0x000c -#define _PTE_NONE_MASK _PAGE_ACCESSED +#define _PTE_NONE_MASK (_PAGE_ACCESSED | _PAGE_KNLRO) /* Until my rework is finished, 8xx still needs atomic PTE updates */ #define PTE_ATOMIC_UPDATES 1 /* We need to add _PAGE_SHARED to kernel pages */ -#define _PAGE_KERNEL_RO(_PAGE_SHARED) +#define _PAGE_KERNEL_RO(_PAGE_SHARED | _PAGE_KNLRO) +#define _PAGE_KERNEL_ROX (_PAGE_EXEC | _PAGE_KNLRO) #define _PAGE_KERNEL_RW(_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE) #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a7af26e..48d3de8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -445,14 +445,8 @@ DataStoreTLBMiss: and r11, r11, r10 rlwimi r10, r11, 0, _PAGE_PRESENT #endif - /* Honour kernel RO, User NA */ - /* 0x200 == Extended encoding, bit 22 */ - rlwimi r10, r10, 32-2, 0x200 /* Copy USER to bit 22, 0x200 */ - /* r11 = (r10 & _PAGE_RW) >> 1 */ - rlwinm r11, r10, 32-1, 0x200 - or r10, r11, r10 - /* invert RW and 0x200 bits */ - xorir10, r10, _PAGE_RW | 0x200 + /* invert RW */ + xorir10, r10, _PAGE_RW /* The Linux PTE won't go exactly into the MMU TLB. * Software indicator bits 22 and 28 must be clear. -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 12/19] powerpc/8xx: Don't use MD_TWC for walk
MD_TWC can only be used properly with 4k pages. So lets calculate level 2 table index by ourselves. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 28 1 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index ad15070..0f571f5 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -297,8 +297,6 @@ InstructionTLBMiss: addir11, r10, -0x1000 tlbie r11 #endif - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10/* Have to use MD_EPN for walk, MI_EPN can't */ /* If we are faulting a kernel address, we have to use the * kernel page tables. @@ -326,10 +324,9 @@ InstructionTLBMiss: ori r11,r11,1 /* Set valid bit */ DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11/* Set segment attributes */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r11, SPRN_MD_TWC/* and get the pte address */ - lwz r10, 0(r11) /* Get the pte */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP andi. r11, r10, _PAGE_ACCESSED | _PAGE_PRESENT @@ -395,12 +392,13 @@ DataStoreTLBMiss: /* We have a pte table, so load fetch the pte from the table. */ - ori r11, r11, 1 /* Set valid bit in physical L2 page */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r10, SPRN_MD_TWC/* and get the pte address */ + mfspr r10, SPRN_MD_EPN/* Get address of fault */ + /* Extract level 2 index */ + rlwinm r10, r10, 22, 20, 29 + rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ /* Insert the Guarded flag into the TWC from the Linux PTE. * It is bit 27 of both the Linux PTE and the TWC (at least * I got that right :-). It will be better when we can put @@ -524,18 +522,16 @@ FixupDAR:/* Entry point for dcbx workaround. */ /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 andis. r11, r10, 0x8000/* Address >= 0x8000 */ - DO_8xx_CPU6(0x3780, r3) - mtspr SPRN_MD_EPN, r10 mfspr r11, SPRN_M_TW /* Get level 1 table base address */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ lwzxr11, r10, r11 /* Get the level 1 entry */ - DO_8xx_CPU6(0x3b80, r3) - mtspr SPRN_MD_TWC, r11/* Load pte table base address */ - mfspr r11, SPRN_MD_TWC/* and get the pte address */ - lwz r11, 0(r11) /* Get the pte */ + rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ + mfspr r11, SPRN_SRR0 /* Get effective address of fault */ + rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + lwzxr11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 08/19] powerpc/8xx: No need to restore registers and save them again.
In DTLBError handler there is not need to restore r10, r11 and cr registers after fixing DAR as they are saved again to the same place just after. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 5f04d5f..e5a250c 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -478,8 +478,8 @@ DataTLBError: cmpwi cr0, r11, 0x00f0 beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ - EXCEPTION_EPILOG_0 - EXCEPTION_PROLOG + EXCEPTION_PROLOG_1 + EXCEPTION_PROLOG_2 mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) mr r5,r10 -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 07/19] powerpc/8xx: DataAccess exception not generated by MPC8xx
DataAccess exception is never generated by MPC8xx so do the job directly where it is used to avoid an unnecessary branching. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 23 ++- 1 files changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 845abf8..5f04d5f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -219,19 +219,9 @@ MachineCheck: EXC_XFER_STD(0x200, machine_check_exception) /* Data access exception. - * This is "never generated" by the MPC8xx. We jump to it for other - * translation errors. + * This is "never generated" by the MPC8xx. */ - . = 0x300 -DataAccess: - EXCEPTION_PROLOG - mfspr r10,SPRN_DSISR - stw r10,_DSISR(r11) - mr r5,r10 - mfspr r4,SPRN_DAR - li r10,0x00f0 - mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ - EXC_XFER_LITE(0x300, handle_page_fault) + EXCEPTION(0x300, DataAccess, unknown_exception, EXC_XFER_STD) /* Instruction access exception. * This is "never generated" by the MPC8xx. @@ -489,7 +479,14 @@ DataTLBError: beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_EPILOG_0 - b DataAccess + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + li r10,0x00f0 + mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ + EXC_XFER_LITE(0x1400, handle_page_fault) EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE) EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 13/19] powerpc/8xx: Use PAGE size related consts
For PAGE size related operations, use PAGE size consts in order to be able to use different page size in the futur. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 30 ++ 1 files changed, 18 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 0f571f5..dcaee9f 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -292,9 +292,9 @@ InstructionTLBMiss: mtspr SPRN_SPRG_SCRATCH2, r10 mfspr r10, SPRN_SRR0 /* Get effective address of fault */ #ifdef CONFIG_8xx_CPU15 - addir11, r10, 0x1000 + addir11, r10, PAGE_SIZE tlbie r11 - addir11, r10, -0x1000 + addir11, r10, -PAGE_SIZE tlbie r11 #endif @@ -313,7 +313,8 @@ InstructionTLBMiss: ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: #endif - rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -325,7 +326,8 @@ InstructionTLBMiss: DO_8xx_CPU6(0x2b80, r3) mtspr SPRN_MI_TWC, r11/* Set segment attributes */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr10, r10, r11 /* Get the pte */ #ifdef CONFIG_SWAP @@ -385,7 +387,8 @@ DataStoreTLBMiss: lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l 3: - rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ + rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ beq 2f /* If zero, don't try to find a pte */ @@ -394,8 +397,8 @@ DataStoreTLBMiss: */ mfspr r10, SPRN_MD_EPN/* Get address of fault */ /* Extract level 2 index */ - rlwinm r10, r10, 22, 20, 29 - rlwimi r10, r11, 0, 0, 19 /* Add level 2 base */ + rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 + rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ ori r11, r11, 1 /* Set valid bit in physical L2 page */ @@ -526,18 +529,20 @@ FixupDAR:/* Entry point for dcbx workaround. */ beq-3f /* Branch if user space */ lis r11, (swapper_pg_dir-PAGE_OFFSET)@h ori r11, r11, (swapper_pg_dir-PAGE_OFFSET)@l -3: rlwinm r10, r10, 12, 20, 29/* Extract level 1 index */ + /* Extract level 1 index */ +3: rlwinm r10, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 lwzxr11, r10, r11 /* Get the level 1 entry */ rlwinm r10, r11,0,0,19 /* Extract page descriptor page address */ mfspr r11, SPRN_SRR0 /* Get effective address of fault */ - rlwinm r11, r11, 22, 20, 29/* Extract level 2 index */ + /* Extract level 2 index */ + rlwinm r11, r11, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29 lwzxr11, r10, r11 /* Get the pte */ #ifdef CONFIG_8xx_CPU6 lwz r3, 8(r0) /* restore r3 from memory */ #endif /* concat physical page address(r11) and page offset(r10) */ mfspr r10, SPRN_SRR0 - rlwimi r11, r10, 0, 20, 31 + rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, @@ -913,12 +918,13 @@ set_dec_cpu6: .globl sdata sdata: .globl empty_zero_page + .align PAGE_SHIFT empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PGD_TABLE_SIZE /* Room for two PTE table poiners, usually the kernel and current user * pointer to their respective root page table (pgdir). -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 14/19] powerpc/8xx: Const for TLB RPN forced value
Value 0x00f0 is used to force bits in TLB level 2 entry. This value is linked to the page size and will vary when we change the page size. Lets define a const for it in order to have it at only one place. Signed-off-by: Christophe Leroy --- arch/powerpc/kernel/head_8xx.S | 19 +-- 1 files changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index dcaee9f..8966262 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,6 +40,13 @@ #else #define DO_8xx_CPU6(val, reg) #endif + +/* + * Value for the bits that have fixed value in RPN entries. + * Also used for tagging DAR for DTLBerror. + */ +#define RPN_PATTERN0x00f0 + __HEAD _ENTRY(_stext); _ENTRY(_start); @@ -211,7 +218,7 @@ MachineCheck: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -237,7 +244,7 @@ Alignment: EXCEPTION_PROLOG mfspr r4,SPRN_DAR stw r4,_DAR(r11) - li r5,0x00f0 + li r5,RPN_PATTERN mtspr SPRN_DAR,r5 /* Tag DAR, to be used in DTLB Error */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) @@ -341,7 +348,7 @@ InstructionTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ - li r11, 0x00f0 + li r11, RPN_PATTERN rlwimi r10, r11, 0, 0x07f8 /* Set 24-27, clear 21-23,28 */ DO_8xx_CPU6(0x2d80, r3) mtspr SPRN_MI_RPN, r10/* Update TLB entry */ @@ -445,7 +452,7 @@ DataStoreTLBMiss: * set. All other Linux PTE bits control the behavior * of the MMU. */ -2: li r11, 0x00f0 +2: li r11, RPN_PATTERN rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ DO_8xx_CPU6(0x3d80, r3) mtspr SPRN_MD_RPN, r10/* Update TLB entry */ @@ -479,7 +486,7 @@ DataTLBError: EXCEPTION_PROLOG_0 mfspr r11, SPRN_DAR - cmpwi cr0, r11, 0x00f0 + cmpwi cr0, r11, RPN_PATTERN beq-FixupDAR/* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ EXCEPTION_PROLOG_1 @@ -488,7 +495,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ stw r10,_DSISR(r11) mr r5,r10 mfspr r4,SPRN_DAR - li r10,0x00f0 + li r10,RPN_PATTERN mtspr SPRN_DAR,r10/* Tag DAR, to be used in DTLB Error */ EXC_XFER_LITE(0x1400, handle_page_fault) -- 1.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev