vmx ghash buggy on ppc64le
Hi: I have received a report that ghash on ppc64le does not interoperate with other implementations of ghash, e.g., on x86-64. https://bugzilla.redhat.com/show_bug.cgi?id=1490972 Could you guys take a look at this and see if this is a bug in the mainline vmx driver too? Thanks! -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH 00/12] x86/crypto: Fix RBP usage in several crypto .S files
On Fri, Sep 15, 2017 at 11:06:29PM +0200, Ingo Molnar wrote: > > Indeed, I suspect they should go through the crypto tree, these fixes are > independent, they don't depend on anything in the x86 tree. Sure I can pick them up through cryptodev. Thanks, -- Email: Herbert XuHome Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
[PATCH v8 4/7] iomap: introduce io{read|write}64_{lo_hi|hi_lo}
In order to provide non-atomic functions for io{read|write}64 that will use readq and writeq when appropriate. We define a number of variants of these functions in the generic iomap that will do non-atomic operations on pio but atomic operations on mmio. These functions are only defined if readq and writeq are defined. If they are not, then the wrappers that always use non-atomic operations from include/linux/io-64-nonatomic*.h will be used. Signed-off-by: Logan GunthorpeReviewed-by: Andy Shevchenko Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Arnd Bergmann Cc: Suresh Warrier Cc: Nicholas Piggin --- arch/powerpc/include/asm/io.h | 2 + include/asm-generic/iomap.h | 26 +++-- lib/iomap.c | 132 ++ 3 files changed, 154 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index af074923d598..4cc420cfaa78 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -788,8 +788,10 @@ extern void __iounmap_at(void *ea, unsigned long size); #define mmio_read16be(addr)readw_be(addr) #define mmio_read32be(addr)readl_be(addr) +#define mmio_read64be(addr)readq_be(addr) #define mmio_write16be(val, addr) writew_be(val, addr) #define mmio_write32be(val, addr) writel_be(val, addr) +#define mmio_write64be(val, addr) writeq_be(val, addr) #define mmio_insb(addr, dst, count)readsb(addr, dst, count) #define mmio_insw(addr, dst, count)readsw(addr, dst, count) #define mmio_insl(addr, dst, count)readsl(addr, dst, count) diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 650fede33c25..30edebf627fe 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -30,9 +30,16 @@ extern unsigned int ioread16(void __iomem *); extern unsigned int ioread16be(void __iomem *); extern unsigned int ioread32(void __iomem *); extern unsigned int ioread32be(void __iomem *); -#ifdef CONFIG_64BIT -extern u64 ioread64(void __iomem *); -extern u64 ioread64be(void __iomem *); + +#ifdef readq +#define ioread64_lo_hi ioread64_lo_hi +#define ioread64_hi_lo ioread64_hi_lo +#define ioread64be_lo_hi ioread64be_lo_hi +#define ioread64be_hi_lo ioread64be_hi_lo +extern u64 ioread64_lo_hi(void __iomem *addr); +extern u64 ioread64_hi_lo(void __iomem *addr); +extern u64 ioread64be_lo_hi(void __iomem *addr); +extern u64 ioread64be_hi_lo(void __iomem *addr); #endif extern void iowrite8(u8, void __iomem *); @@ -40,9 +47,16 @@ extern void iowrite16(u16, void __iomem *); extern void iowrite16be(u16, void __iomem *); extern void iowrite32(u32, void __iomem *); extern void iowrite32be(u32, void __iomem *); -#ifdef CONFIG_64BIT -extern void iowrite64(u64, void __iomem *); -extern void iowrite64be(u64, void __iomem *); + +#ifdef writeq +#define iowrite64_lo_hi iowrite64_lo_hi +#define iowrite64_hi_lo iowrite64_hi_lo +#define iowrite64be_lo_hi iowrite64be_lo_hi +#define iowrite64be_hi_lo iowrite64be_hi_lo +extern void iowrite64_lo_hi(u64 val, void __iomem *addr); +extern void iowrite64_hi_lo(u64 val, void __iomem *addr); +extern void iowrite64be_lo_hi(u64 val, void __iomem *addr); +extern void iowrite64be_hi_lo(u64 val, void __iomem *addr); #endif /* diff --git a/lib/iomap.c b/lib/iomap.c index fc3dcb4b238e..845b9c41082c 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -66,6 +66,7 @@ static void bad_io_access(unsigned long port, const char *access) #ifndef mmio_read16be #define mmio_read16be(addr) be16_to_cpu(__raw_readw(addr)) #define mmio_read32be(addr) be32_to_cpu(__raw_readl(addr)) +#define mmio_read64be(addr) be64_to_cpu(__raw_readq(addr)) #endif unsigned int ioread8(void __iomem *addr) @@ -99,6 +100,80 @@ EXPORT_SYMBOL(ioread16be); EXPORT_SYMBOL(ioread32); EXPORT_SYMBOL(ioread32be); +#ifdef readq +static u64 pio_read64_lo_hi(unsigned long port) +{ + u64 lo, hi; + + lo = inl(port); + hi = inl(port + sizeof(u32)); + + return lo | (hi << 32); +} + +static u64 pio_read64_hi_lo(unsigned long port) +{ + u64 lo, hi; + + hi = inl(port + sizeof(u32)); + lo = inl(port); + + return lo | (hi << 32); +} + +static u64 pio_read64be_lo_hi(unsigned long port) +{ + u64 lo, hi; + + lo = pio_read32be(port + sizeof(u32)); + hi = pio_read32be(port); + + return lo | (hi << 32); +} + +static u64 pio_read64be_hi_lo(unsigned long port) +{ + u64 lo, hi; + + hi = pio_read32be(port); + lo = pio_read32be(port + sizeof(u32)); + + return lo | (hi << 32); +} + +u64 ioread64_lo_hi(void __iomem *addr) +{ + IO_COND(addr, return pio_read64_lo_hi(port), return readq(addr)); + return 0xULL; +} + +u64
[PATCH v8 5/7] io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros
This patch adds generic io{read|write}64[be]{_lo_hi|_hi_lo} macros if they are not already defined by the architecture. (As they are provided by the generic iomap library). The patch also points io{read|write}64[be] to the variant specified by the header name. This is because new drivers are encouraged to use ioreadXX, et al instead of readX[1], et al -- and mixing ioreadXX with readq is pretty ugly. [1] LDD3: section 9.4.2 Signed-off-by: Logan GunthorpeReviewed-by: Andy Shevchenko Cc: Christoph Hellwig Cc: Arnd Bergmann Cc: Alan Cox Cc: Greg Kroah-Hartman --- include/linux/io-64-nonatomic-hi-lo.h | 64 +++ include/linux/io-64-nonatomic-lo-hi.h | 64 +++ 2 files changed, 128 insertions(+) diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h index defcc4644ce3..410c8b177080 100644 --- a/include/linux/io-64-nonatomic-hi-lo.h +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -54,4 +54,68 @@ static inline void hi_lo_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed hi_lo_writeq_relaxed #endif +#ifndef ioread64_hi_lo +#define ioread64_hi_lo ioread64_hi_lo +static inline u64 ioread64_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32(addr + sizeof(u32)); + low = ioread32(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_hi_lo +#define iowrite64_hi_lo iowrite64_hi_lo +static inline void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32(val >> 32, addr + sizeof(u32)); + iowrite32(val, addr); +} +#endif + +#ifndef ioread64be_hi_lo +#define ioread64be_hi_lo ioread64be_hi_lo +static inline u64 ioread64be_hi_lo(void __iomem *addr) +{ + u32 low, high; + + high = ioread32be(addr); + low = ioread32be(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_hi_lo +#define iowrite64be_hi_lo iowrite64be_hi_lo +static inline void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + iowrite32be(val >> 32, addr); + iowrite32be(val, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_hi_lo +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_hi_lo +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_hi_lo +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_hi_lo +#endif + #endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h index 084461a4e5ab..acba36812be8 100644 --- a/include/linux/io-64-nonatomic-lo-hi.h +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -54,4 +54,68 @@ static inline void lo_hi_writeq_relaxed(__u64 val, volatile void __iomem *addr) #define writeq_relaxed lo_hi_writeq_relaxed #endif +#ifndef ioread64_lo_hi +#define ioread64_lo_hi ioread64_lo_hi +static inline u64 ioread64_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32(addr); + high = ioread32(addr + sizeof(u32)); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64_lo_hi +#define iowrite64_lo_hi iowrite64_lo_hi +static inline void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32(val, addr); + iowrite32(val >> 32, addr + sizeof(u32)); +} +#endif + +#ifndef ioread64be_lo_hi +#define ioread64be_lo_hi ioread64be_lo_hi +static inline u64 ioread64be_lo_hi(void __iomem *addr) +{ + u32 low, high; + + low = ioread32be(addr + sizeof(u32)); + high = ioread32be(addr); + + return low + ((u64)high << 32); +} +#endif + +#ifndef iowrite64be_lo_hi +#define iowrite64be_lo_hi iowrite64be_lo_hi +static inline void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + iowrite32be(val, addr + sizeof(u32)); + iowrite32be(val >> 32, addr); +} +#endif + +#ifndef ioread64 +#define ioread64_is_nonatomic +#define ioread64 ioread64_lo_hi +#endif + +#ifndef iowrite64 +#define iowrite64_is_nonatomic +#define iowrite64 iowrite64_lo_hi +#endif + +#ifndef ioread64be +#define ioread64be_is_nonatomic +#define ioread64be ioread64be_lo_hi +#endif + +#ifndef iowrite64be +#define iowrite64be_is_nonatomic +#define iowrite64be iowrite64be_lo_hi +#endif + #endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- 2.11.0
[PATCH v8 2/7] powerpc: io.h: move iomap.h include so that it can use readq/writeq defs
Subsequent patches in this series makes use of the readq and writeq defines in iomap.h. However, as is, they get missed on the powerpc platform seeing the include comes before the define. This patch moves the include down to fix this. Signed-off-by: Logan GunthorpeAcked-By: Michael Ellerman Reviewed-by: Andy Shevchenko Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Suresh Warrier Cc: "Oliver O'Halloran" --- arch/powerpc/include/asm/io.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 422f99cf9924..af074923d598 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -33,8 +33,6 @@ extern struct pci_dev *isa_bridge_pcidev; #include #include -#include - #ifdef CONFIG_PPC64 #include #endif @@ -663,6 +661,8 @@ static inline void name at \ #define writel_relaxed(v, addr)writel(v, addr) #define writeq_relaxed(v, addr)writeq(v, addr) +#include + #ifdef CONFIG_PPC32 #define mmiowb() #else -- 2.11.0
[PATCH v8 1/7] drm/tilcdc: ensure nonatomic iowrite64 is not used
Add a check to ensure iowrite64 is only used if it is atomic. It was decided in [1] that the tilcdc driver should not be using an atomic operation (so it was left out of this patchset). However, it turns out that through the drm code, a nonatomic header is actually included: include/linux/io-64-nonatomic-lo-hi.h is included from include/drm/drm_os_linux.h:9:0, from include/drm/drmP.h:74, from include/drm/drm_modeset_helper.h:26, from include/drm/drm_atomic_helper.h:33, from drivers/gpu/drm/tilcdc/tilcdc_crtc.c:19: And thus, without this change, this patchset would inadvertantly change the behaviour of the tilcdc driver. [1] lkml.kernel.org/r/cak8p3a2hho_zcnstzq7hmwsz5la5thu19fwzpun16imnyyn...@mail.gmail.com Signed-off-by: Logan GunthorpeReviewed-by: Andy Shevchenko Cc: Jyri Sarha Cc: Arnd Bergmann Cc: Tomi Valkeinen Cc: David Airlie --- drivers/gpu/drm/tilcdc/tilcdc_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tilcdc/tilcdc_regs.h b/drivers/gpu/drm/tilcdc/tilcdc_regs.h index 9d528c0a67a4..5048ebb86835 100644 --- a/drivers/gpu/drm/tilcdc/tilcdc_regs.h +++ b/drivers/gpu/drm/tilcdc/tilcdc_regs.h @@ -133,7 +133,7 @@ static inline void tilcdc_write64(struct drm_device *dev, u32 reg, u64 data) struct tilcdc_drm_private *priv = dev->dev_private; volatile void __iomem *addr = priv->mmio + reg; -#ifdef iowrite64 +#if defined(iowrite64) && !defined(iowrite64_is_nonatomic) iowrite64(data, addr); #else __iowmb(); -- 2.11.0
[PATCH v8 3/7] powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo}
These functions will be introduced into the generic iomap.c so they can deal with PIO accesses in hi-lo/lo-hi variants. Thus, the powerpc version of iomap.c will need to provide the same functions even though, in this arch, they are identical to the regular io{read|write}64 functions. Signed-off-by: Logan GunthorpeTested-by: Horia Geantă Reviewed-by: Andy Shevchenko Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman --- arch/powerpc/kernel/iomap.c | 40 1 file changed, 40 insertions(+) diff --git a/arch/powerpc/kernel/iomap.c b/arch/powerpc/kernel/iomap.c index a1854d1ded8b..b43dbadfd24f 100644 --- a/arch/powerpc/kernel/iomap.c +++ b/arch/powerpc/kernel/iomap.c @@ -44,12 +44,32 @@ u64 ioread64(void __iomem *addr) { return readq(addr); } +u64 ioread64_lo_hi(void __iomem *addr) +{ + return readq(addr); +} +u64 ioread64_hi_lo(void __iomem *addr) +{ + return readq(addr); +} u64 ioread64be(void __iomem *addr) { return readq_be(addr); } +u64 ioread64be_lo_hi(void __iomem *addr) +{ + return readq_be(addr); +} +u64 ioread64be_hi_lo(void __iomem *addr) +{ + return readq_be(addr); +} EXPORT_SYMBOL(ioread64); +EXPORT_SYMBOL(ioread64_lo_hi); +EXPORT_SYMBOL(ioread64_hi_lo); EXPORT_SYMBOL(ioread64be); +EXPORT_SYMBOL(ioread64be_lo_hi); +EXPORT_SYMBOL(ioread64be_hi_lo); #endif /* __powerpc64__ */ void iowrite8(u8 val, void __iomem *addr) @@ -82,12 +102,32 @@ void iowrite64(u64 val, void __iomem *addr) { writeq(val, addr); } +void iowrite64_lo_hi(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} +void iowrite64_hi_lo(u64 val, void __iomem *addr) +{ + writeq(val, addr); +} void iowrite64be(u64 val, void __iomem *addr) { writeq_be(val, addr); } +void iowrite64be_lo_hi(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} +void iowrite64be_hi_lo(u64 val, void __iomem *addr) +{ + writeq_be(val, addr); +} EXPORT_SYMBOL(iowrite64); +EXPORT_SYMBOL(iowrite64_lo_hi); +EXPORT_SYMBOL(iowrite64_hi_lo); EXPORT_SYMBOL(iowrite64be); +EXPORT_SYMBOL(iowrite64be_lo_hi); +EXPORT_SYMBOL(iowrite64be_hi_lo); #endif /* __powerpc64__ */ /* -- 2.11.0
[PATCH v8 0/7] make io{read|write}64 globally usable
This is version eight of my patchset to enable drivers to use io{read|write}64 on all arches. -- Changes since v7: - Fix minor nits from Andy Shevchenko - Rebased onto v4.14-rc1 Changes since v6: ** none ** Changes since v5: - Added a fix to the tilcdc driver to ensure it doesn't use the non-atomic operation. (This includes adding io{read|write}64[be]_is_nonatomic defines). Changes since v4: - Add functions so the powerpc implementation of iomap.c compiles. (As noticed by Horia) Changes since v3: - I noticed powerpc didn't use the appropriate functions seeing readq/writeq were not defined when iomap.h was included. Thus I've included a patch to adjust this - Fixed some mistakes with a couple of the defines in io-64-nonatomic* headers - Fixed a typo noticed by Horia. (earlier versions were drastically different) -- Horia Geantă (1): crypto: caam: cleanup CONFIG_64BIT ifdefs when using io{read|write}64 Logan Gunthorpe (6): drm/tilcdc: ensure nonatomic iowrite64 is not used powerpc: io.h: move iomap.h include so that it can use readq/writeq defs powerpc: iomap.c: introduce io{read|write}64_{lo_hi|hi_lo} iomap: introduce io{read|write}64_{lo_hi|hi_lo} io-64-nonatomic: add io{read|write}64[be]{_lo_hi|_hi_lo} macros ntb: ntb_hw_intel: use io-64-nonatomic instead of in-driver hacks arch/powerpc/include/asm/io.h | 6 +- arch/powerpc/kernel/iomap.c | 40 +++ drivers/crypto/caam/regs.h| 35 ++--- drivers/gpu/drm/tilcdc/tilcdc_regs.h | 2 +- drivers/ntb/hw/intel/ntb_hw_intel.c | 30 +--- include/asm-generic/iomap.h | 26 +-- include/linux/io-64-nonatomic-hi-lo.h | 64 + include/linux/io-64-nonatomic-lo-hi.h | 64 + lib/iomap.c | 132 ++ 9 files changed, 331 insertions(+), 68 deletions(-) -- 2.11.0
[PATCH v8 6/7] ntb: ntb_hw_intel: use io-64-nonatomic instead of in-driver hacks
Now that ioread64 and iowrite64 are available in io-64-nonatomic, we can remove the hack at the top of ntb_hw_intel.c and replace it with an include. Signed-off-by: Logan GunthorpeReviewed-by: Andy Shevchenko Acked-by: Dave Jiang Acked-by: Allen Hubbe Acked-by: Jon Mason --- drivers/ntb/hw/intel/ntb_hw_intel.c | 30 +- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/drivers/ntb/hw/intel/ntb_hw_intel.c b/drivers/ntb/hw/intel/ntb_hw_intel.c index 2557e2c05b90..606c90f59d4b 100644 --- a/drivers/ntb/hw/intel/ntb_hw_intel.c +++ b/drivers/ntb/hw/intel/ntb_hw_intel.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "ntb_hw_intel.h" @@ -155,35 +156,6 @@ MODULE_PARM_DESC(xeon_b2b_dsd_bar5_addr32, static inline enum ntb_topo xeon_ppd_topo(struct intel_ntb_dev *ndev, u8 ppd); static int xeon_init_isr(struct intel_ntb_dev *ndev); -#ifndef ioread64 -#ifdef readq -#define ioread64 readq -#else -#define ioread64 _ioread64 -static inline u64 _ioread64(void __iomem *mmio) -{ - u64 low, high; - - low = ioread32(mmio); - high = ioread32(mmio + sizeof(u32)); - return low | (high << 32); -} -#endif -#endif - -#ifndef iowrite64 -#ifdef writeq -#define iowrite64 writeq -#else -#define iowrite64 _iowrite64 -static inline void _iowrite64(u64 val, void __iomem *mmio) -{ - iowrite32(val, mmio); - iowrite32(val >> 32, mmio + sizeof(u32)); -} -#endif -#endif - static inline int pdev_is_atom(struct pci_dev *pdev) { switch (pdev->device) { -- 2.11.0
[PATCH v8 7/7] crypto: caam: cleanup CONFIG_64BIT ifdefs when using io{read|write}64
From: Horia GeantăWe can now make use of the io-64-nonatomic-lo-hi header to always provide 64 bit IO operations. So this patch cleans up the extra CONFIG_64BIT ifdefs. To be consistent with CAAM engine HW spec: in case of 64-bit registers, irrespective of device endianness, the lower address should be read from / written to first, followed by the upper address. Indeed the I/O accessors in CAAM driver currently don't follow the spec, however this is a good opportunity to fix the code. Signed-off-by: Horia Geantă Signed-off-by: Logan Gunthorpe Reviewed-by: Andy Shevchenko Cc: Horia Geantă Cc: Dan Douglass Cc: Herbert Xu Cc: "David S. Miller" --- drivers/crypto/caam/regs.h | 35 +-- 1 file changed, 5 insertions(+), 30 deletions(-) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 2b5efff9ec3c..868ae7fb7c9f 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -9,7 +9,7 @@ #include #include -#include +#include /* * Architecture-specific register access methods @@ -134,50 +134,25 @@ static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) *base + 0x : least-significant 32 bits *base + 0x0004 : most-significant 32 bits */ -#ifdef CONFIG_64BIT static inline void wr_reg64(void __iomem *reg, u64 data) { +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX if (caam_little_end) iowrite64(data, reg); else - iowrite64be(data, reg); -} - -static inline u64 rd_reg64(void __iomem *reg) -{ - if (caam_little_end) - return ioread64(reg); - else - return ioread64be(reg); -} - -#else /* CONFIG_64BIT */ -static inline void wr_reg64(void __iomem *reg, u64 data) -{ -#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX - if (caam_little_end) { - wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); - wr_reg32((u32 __iomem *)(reg), data); - } else #endif - { - wr_reg32((u32 __iomem *)(reg), data >> 32); - wr_reg32((u32 __iomem *)(reg) + 1, data); - } + iowrite64be(data, reg); } static inline u64 rd_reg64(void __iomem *reg) { #ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX if (caam_little_end) - return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | - (u64)rd_reg32((u32 __iomem *)(reg))); + return ioread64(reg); else #endif - return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | - (u64)rd_reg32((u32 __iomem *)(reg) + 1)); + return ioread64be(reg); } -#endif /* CONFIG_64BIT */ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT #ifdef CONFIG_SOC_IMX7D -- 2.11.0
[PATCH v2 01/12] x86/crypto: Fix RBP usage in blowfish-x86_64-asm_64.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R12 instead of RBP. R12 can't be used as the RT0 register because of x86 instruction encoding limitations. So use R12 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/blowfish-x86_64-asm_64.S | 48 +--- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S index 246c67006ed0..8c1fcb6bad21 100644 --- a/arch/x86/crypto/blowfish-x86_64-asm_64.S +++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S @@ -33,7 +33,7 @@ #define s3 ((16 + 2 + (3 * 256)) * 4) /* register macros */ -#define CTX %rdi +#define CTX %r12 #define RIO %rsi #define RX0 %rax @@ -56,12 +56,12 @@ #define RX2bh %ch #define RX3bh %dh -#define RT0 %rbp +#define RT0 %rdi #define RT1 %rsi #define RT2 %r8 #define RT3 %r9 -#define RT0d %ebp +#define RT0d %edi #define RT1d %esi #define RT2d %r8d #define RT3d %r9d @@ -120,13 +120,14 @@ ENTRY(__blowfish_enc_blk) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -142,7 +143,7 @@ ENTRY(__blowfish_enc_blk) round_enc(14); add_roundkey_enc(16); - movq %r11, %rbp; + movq %r11, %r12; movq %r10, RIO; test %cl, %cl; @@ -157,12 +158,13 @@ ENDPROC(__blowfish_enc_blk) ENTRY(blowfish_dec_blk) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src */ - movq %rbp, %r11; + movq %r12, %r11; + movq %rdi, CTX; movq %rsi, %r10; movq %rdx, RIO; @@ -181,7 +183,7 @@ ENTRY(blowfish_dec_blk) movq %r10, RIO; write_block(); - movq %r11, %rbp; + movq %r11, %r12; ret; ENDPROC(blowfish_dec_blk) @@ -298,20 +300,21 @@ ENDPROC(blowfish_dec_blk) ENTRY(__blowfish_enc_blk_4way) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src * %rcx: bool, if true: xor output */ - pushq %rbp; + pushq %r12; pushq %rbx; pushq %rcx; - preload_roundkey_enc(0); - + movq %rdi, CTX movq %rsi, %r11; movq %rdx, RIO; + preload_roundkey_enc(0); + read_block4(); round_enc4(0); @@ -324,39 +327,40 @@ ENTRY(__blowfish_enc_blk_4way) round_enc4(14); add_preloaded_roundkey4(); - popq %rbp; + popq %r12; movq %r11, RIO; - test %bpl, %bpl; + test %r12b, %r12b; jnz .L__enc_xor4; write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; .L__enc_xor4: xor_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(__blowfish_enc_blk_4way) ENTRY(blowfish_dec_blk_4way) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src */ - pushq %rbp; + pushq %r12; pushq %rbx; - preload_roundkey_dec(17); - movq %rsi, %r11; + movq %rdi, CTX; + movq %rsi, %r11 movq %rdx, RIO; + preload_roundkey_dec(17); read_block4(); round_dec4(17); @@ -373,7 +377,7 @@ ENTRY(blowfish_dec_blk_4way) write_block4(); popq %rbx; - popq %rbp; + popq %r12; ret; ENDPROC(blowfish_dec_blk_4way) -- 2.13.5
[PATCH v2 04/12] x86/crypto: Fix RBP usage in cast6-avx-x86_64-asm_64.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R15 instead of RBP. R15 can't be used as the RID1 register because of x86 instruction encoding limitations. So use R15 for CTX and RDI for CTX. This means that CTX is no longer an implicit function argument. Instead it needs to be explicitly copied from RDI. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 +-- 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index 952d3156a933..7f30b6f0d72c 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -47,7 +47,7 @@ /** 8-way AVX cast6 **/ -#define CTX %rdi +#define CTX %r15 #define RA1 %xmm0 #define RB1 %xmm1 @@ -70,8 +70,8 @@ #define RTMP %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %rdi +#define RID1d %edi #define RID2 %rsi #define RID2d %esi @@ -264,15 +264,17 @@ .align 8 __cast6_enc_blk8: /* input: -* %rdi: ctx, CTX +* %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -297,7 +299,7 @@ __cast6_enc_blk8: QBAR(11); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; @@ -310,15 +312,17 @@ ENDPROC(__cast6_enc_blk8) .align 8 __cast6_dec_blk8: /* input: -* %rdi: ctx, CTX +* %rdi: ctx * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks * output: * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: decrypted blocks */ - pushq %rbp; + pushq %r15; pushq %rbx; + movq %rdi, CTX; + vmovdqa .Lbswap_mask, RKM; vmovd .Lfirst_mask, R1ST; vmovd .L32_mask, R32; @@ -343,7 +347,7 @@ __cast6_dec_blk8: QBAR(0); popq %rbx; - popq %rbp; + popq %r15; vmovdqa .Lbswap_mask, RKM; outunpack_blocks(RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM); @@ -354,12 +358,14 @@ ENDPROC(__cast6_dec_blk8) ENTRY(cast6_ecb_enc_8way) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -368,18 +374,21 @@ ENTRY(cast6_ecb_enc_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_enc_8way) ENTRY(cast6_ecb_dec_8way) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; load_8way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); @@ -388,20 +397,22 @@ ENTRY(cast6_ecb_dec_8way) store_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; FRAME_END ret; ENDPROC(cast6_ecb_dec_8way) ENTRY(cast6_cbc_dec_8way) /* input: -* %rdi: ctx, CTX +* %rdi: ctx * %rsi: dst * %rdx: src */ FRAME_BEGIN - pushq %r12; + pushq %r15; + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -411,8 +422,8 @@ ENTRY(cast6_cbc_dec_8way) store_cbc_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_cbc_dec_8way) @@ -425,9 +436,10 @@ ENTRY(cast6_ctr_8way) * %rcx: iv (little endian, 128bit) */ FRAME_BEGIN - pushq %r12; + pushq %r15 + movq %rdi, CTX; movq %rsi, %r11; movq %rdx, %r12; @@ -438,8 +450,8 @@ ENTRY(cast6_ctr_8way) store_ctr_8way(%r12, %r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2); + popq %r15; popq %r12; - FRAME_END ret; ENDPROC(cast6_ctr_8way) @@ -452,7 +464,9 @@ ENTRY(cast6_xts_enc_8way) * %rcx: iv (t ⊕ αⁿ ∈
[PATCH v2 02/12] x86/crypto: Fix RBP usage in camellia-x86_64-asm_64.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R12 instead of RBP. Both are callee-saved registers, so the substitution is straightforward. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/camellia-x86_64-asm_64.S | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S index 310319c601ed..95ba6956a7f6 100644 --- a/arch/x86/crypto/camellia-x86_64-asm_64.S +++ b/arch/x86/crypto/camellia-x86_64-asm_64.S @@ -75,17 +75,17 @@ #define RCD1bh %dh #define RT0 %rsi -#define RT1 %rbp +#define RT1 %r12 #define RT2 %r8 #define RT0d %esi -#define RT1d %ebp +#define RT1d %r12d #define RT2d %r8d #define RT2bl %r8b #define RXOR %r9 -#define RRBP %r10 +#define RR12 %r10 #define RDST %r11 #define RXORd %r9d @@ -197,7 +197,7 @@ ENTRY(__camellia_enc_blk) * %rdx: src * %rcx: bool xor */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; @@ -227,13 +227,13 @@ ENTRY(__camellia_enc_blk) enc_outunpack(mov, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; .L__enc_xor: enc_outunpack(xor, RT1); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(__camellia_enc_blk) @@ -248,7 +248,7 @@ ENTRY(camellia_dec_blk) movl $24, RXORd; cmovel RXORd, RT2d; /* max */ - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -271,7 +271,7 @@ ENTRY(camellia_dec_blk) dec_outunpack(); - movq RRBP, %rbp; + movq RR12, %r12; ret; ENDPROC(camellia_dec_blk) @@ -433,7 +433,7 @@ ENTRY(__camellia_enc_blk_2way) */ pushq %rbx; - movq %rbp, RRBP; + movq %r12, RR12; movq %rcx, RXOR; movq %rsi, RDST; movq %rdx, RIO; @@ -461,14 +461,14 @@ ENTRY(__camellia_enc_blk_2way) enc_outunpack2(mov, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; .L__enc2_xor: enc_outunpack2(xor, RT2); - movq RRBP, %rbp; + movq RR12, %r12; popq %rbx; ret; ENDPROC(__camellia_enc_blk_2way) @@ -485,7 +485,7 @@ ENTRY(camellia_dec_blk_2way) cmovel RXORd, RT2d; /* max */ movq %rbx, RXOR; - movq %rbp, RRBP; + movq %r12, RR12; movq %rsi, RDST; movq %rdx, RIO; @@ -508,7 +508,7 @@ ENTRY(camellia_dec_blk_2way) dec_outunpack2(); - movq RRBP, %rbp; + movq RR12, %r12; movq RXOR, %rbx; ret; ENDPROC(camellia_dec_blk_2way) -- 2.13.5
[PATCH v2 05/12] x86/crypto: Fix RBP usage in des3_ede-asm_64.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use RSI instead of RBP for RT1. Since RSI is also used as a the 'dst' function argument, it needs to be saved on the stack until the argument is needed. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/des3_ede-asm_64.S | 15 +-- 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index f3e91647ca27..8e49ce117494 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -64,12 +64,12 @@ #define RW2bh %ch #define RT0 %r15 -#define RT1 %rbp +#define RT1 %rsi #define RT2 %r14 #define RT3 %rdx #define RT0d %r15d -#define RT1d %ebp +#define RT1d %esi #define RT2d %r14d #define RT3d %edx @@ -177,13 +177,14 @@ ENTRY(des3_ede_x86_64_crypt_blk) * %rsi: dst * %rdx: src */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi; /* dst */ + read_block(%rdx, RL0, RR0); initial_permutation(RL0, RR0); @@ -241,6 +242,8 @@ ENTRY(des3_ede_x86_64_crypt_blk) round1(32+15, RL0, RR0, dummy2); final_permutation(RR0, RL0); + + popq %rsi /* dst */ write_block(%rsi, RR0, RL0); popq %r15; @@ -248,7 +251,6 @@ ENTRY(des3_ede_x86_64_crypt_blk) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk) @@ -432,13 +434,14 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) * %rdx: src (3 blocks) */ - pushq %rbp; pushq %rbx; pushq %r12; pushq %r13; pushq %r14; pushq %r15; + pushq %rsi /* dst */ + /* load input */ movl 0 * 4(%rdx), RL0d; movl 1 * 4(%rdx), RR0d; @@ -520,6 +523,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) bswapl RR2d; bswapl RL2d; + popq %rsi /* dst */ movl RR0d, 0 * 4(%rsi); movl RL0d, 1 * 4(%rsi); movl RR1d, 2 * 4(%rsi); @@ -532,7 +536,6 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) popq %r13; popq %r12; popq %rbx; - popq %rbp; ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) -- 2.13.5
[PATCH v2 09/12] x86/crypto: Fix RBP usage in sha256-avx2-asm.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. There's no need to use RBP as a temporary register for the TBL value, because it always stores the same value: the address of the K256 table. Instead just reference the address of K256 directly. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/sha256-avx2-asm.S | 22 +++--- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 89c8f09787d2..1420db15dcdd 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -98,8 +98,6 @@ d = %r8d e = %edx # clobbers NUM_BLKS y3 = %esi # clobbers INP - -TBL= %rbp SRND = CTX # SRND is same register as CTX a = %eax @@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE ENTRY(sha256_transform_rorx) .align 32 pushq %rbx - pushq %rbp pushq %r12 pushq %r13 pushq %r14 @@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx) mov CTX, _CTX(%rsp) loop0: - lea K256(%rip), TBL - ## Load first 16 dwords from two blocks VMOVDQ 0*32(INP),XTMP0 VMOVDQ 1*32(INP),XTMP1 @@ -597,19 +592,19 @@ last_block_enter: .align 16 loop1: - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X0, XFER + vpaddd K256+1*32(SRND), X0, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 1*32 - vpaddd 2*32(TBL, SRND), X0, XFER + vpaddd K256+2*32(SRND), X0, XFER vmovdqa XFER, 2*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 2*32 - vpaddd 3*32(TBL, SRND), X0, XFER + vpaddd K256+3*32(SRND), X0, XFER vmovdqa XFER, 3*32+_XFER(%rsp, SRND) FOUR_ROUNDS_AND_SCHED _XFER + 3*32 @@ -619,10 +614,11 @@ loop1: loop2: ## Do last 16 rounds with no scheduling - vpaddd 0*32(TBL, SRND), X0, XFER + vpaddd K256+0*32(SRND), X0, XFER vmovdqa XFER, 0*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 0*32 - vpaddd 1*32(TBL, SRND), X1, XFER + + vpaddd K256+1*32(SRND), X1, XFER vmovdqa XFER, 1*32+_XFER(%rsp, SRND) DO_4ROUNDS _XFER + 1*32 add $2*32, SRND @@ -676,9 +672,6 @@ loop3: ja done_hash do_last_block: - do last block - lea K256(%rip), TBL - VMOVDQ 0*16(INP),XWORD0 VMOVDQ 1*16(INP),XWORD1 VMOVDQ 2*16(INP),XWORD2 @@ -718,7 +711,6 @@ done_hash: popq%r14 popq%r13 popq%r12 - popq%rbp popq%rbx ret ENDPROC(sha256_transform_rorx) -- 2.13.5
[PATCH v2 07/12] x86/crypto: Fix RBP usage in sha1_ssse3_asm.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Swap the usages of R12 and RBP. Use R12 for the REG_D register, and use RBP to store the pre-aligned stack pointer. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/sha1_ssse3_asm.S | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index a4109506a5e8..6204bd53528c 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -37,7 +37,7 @@ #define REG_A %ecx #define REG_B %esi #define REG_C %edi -#define REG_D %ebp +#define REG_D %r12d #define REG_E %edx #define REG_T1 %eax @@ -74,10 +74,10 @@ ENTRY(\name) push%rbx - push%rbp push%r12 + push%rbp + mov %rsp, %rbp - mov %rsp, %r12 sub $64, %rsp # allocate workspace and $~15, %rsp # align stack @@ -99,10 +99,9 @@ xor %rax, %rax rep stosq - mov %r12, %rsp # deallocate workspace - - pop %r12 + mov %rbp, %rsp # deallocate workspace pop %rbp + pop %r12 pop %rbx ret -- 2.13.5
[PATCH v2 08/12] x86/crypto: Fix RBP usage in sha256-avx-asm.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Swap the usages of R12 and RBP. Use R12 for the TBL register, and use RBP to store the pre-aligned stack pointer. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/sha256-avx-asm.S | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index e0a1a5f2..001bbcf93c79 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -103,7 +103,7 @@ SRND = %rsi # clobbers INP c = %ecx d = %r8d e = %edx -TBL = %rbp +TBL = %r12 a = %eax b = %ebx @@ -350,13 +350,13 @@ a = TMP_ ENTRY(sha256_transform_avx) .align 32 pushq %rbx - pushq %rbp + pushq %r12 pushq %r13 pushq %r14 pushq %r15 - pushq %r12 + pushq %rbp + movq%rsp, %rbp - mov %rsp, %r12 subq$STACK_SIZE, %rsp # allocate stack space and $~15, %rsp # align stack pointer @@ -452,13 +452,12 @@ loop2: done_hash: - mov %r12, %rsp - - popq%r12 + mov %rbp, %rsp + popq%rbp popq%r15 popq%r14 popq%r13 - popq%rbp + popq%r12 popq%rbx ret ENDPROC(sha256_transform_avx) -- 2.13.5
[PATCH v2 12/12] x86/crypto: Fix RBP usage in twofish-avx-x86_64-asm_64.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Use R13 instead of RBP. Both are callee-saved registers, so the substitution is straightforward. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index b3f49d286348..73b471da3622 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -76,8 +76,8 @@ #define RT %xmm14 #define RR %xmm15 -#define RID1 %rbp -#define RID1d %ebp +#define RID1 %r13 +#define RID1d %r13d #define RID2 %rsi #define RID2d %esi @@ -259,7 +259,7 @@ __twofish_enc_blk8: vmovdqu w(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; pushq %rcx; @@ -282,7 +282,7 @@ __twofish_enc_blk8: popq %rcx; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2); @@ -301,7 +301,7 @@ __twofish_dec_blk8: vmovdqu (w+4*4)(CTX), RK1; - pushq %rbp; + pushq %r13; pushq %rbx; inpack_blocks(RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2); @@ -322,7 +322,7 @@ __twofish_dec_blk8: vmovdqu (w)(CTX), RK1; popq %rbx; - popq %rbp; + popq %r13; outunpack_blocks(RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2); outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2); -- 2.13.5
[PATCH v2 11/12] x86/crypto: Fix RBP usage in sha512-avx2-asm.S
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. Mix things up a little bit to get rid of the RBP usage, without hurting performance too much. Use RDI instead of RBP for the TBL pointer. That will clobber CTX, so spill CTX onto the stack and use R12 to read it in the outer loop. R12 is used as a non-persistent temporary variable elsewhere, so it's safe to use. Also remove the unused y4 variable. Reported-by: Eric BiggersReported-by: Peter Zijlstra Tested-by: Eric Biggers Acked-by: Eric Biggers Signed-off-by: Josh Poimboeuf --- arch/x86/crypto/sha512-avx2-asm.S | 75 --- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 7f5f6c6ec72e..b16d56005162 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -69,8 +69,9 @@ XFER = YTMP0 BYTE_FLIP_MASK = %ymm9 -# 1st arg -CTX = %rdi +# 1st arg is %rdi, which is saved to the stack and accessed later via %r12 +CTX1= %rdi +CTX2= %r12 # 2nd arg INP = %rsi # 3rd arg @@ -81,7 +82,7 @@ d = %r8 e = %rdx y3 = %rsi -TBL = %rbp +TBL = %rdi # clobbers CTX1 a = %rax b = %rbx @@ -91,26 +92,26 @@ g = %r10 h = %r11 old_h = %r11 -T1= %r12 +T1= %r12 # clobbers CTX2 y0= %r13 y1= %r14 y2= %r15 -y4= %r12 - # Local variables (stack frame) XFER_SIZE = 4*8 SRND_SIZE = 1*8 INP_SIZE = 1*8 INPEND_SIZE = 1*8 +CTX_SIZE = 1*8 RSPSAVE_SIZE = 1*8 -GPRSAVE_SIZE = 6*8 +GPRSAVE_SIZE = 5*8 frame_XFER = 0 frame_SRND = frame_XFER + XFER_SIZE frame_INP = frame_SRND + SRND_SIZE frame_INPEND = frame_INP + INP_SIZE -frame_RSPSAVE = frame_INPEND + INPEND_SIZE +frame_CTX = frame_INPEND + INPEND_SIZE +frame_RSPSAVE = frame_CTX + CTX_SIZE frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE frame_size = frame_GPRSAVE + GPRSAVE_SIZE @@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx) mov %rax, frame_RSPSAVE(%rsp) # Save GPRs - mov %rbp, frame_GPRSAVE(%rsp) - mov %rbx, 8*1+frame_GPRSAVE(%rsp) - mov %r12, 8*2+frame_GPRSAVE(%rsp) - mov %r13, 8*3+frame_GPRSAVE(%rsp) - mov %r14, 8*4+frame_GPRSAVE(%rsp) - mov %r15, 8*5+frame_GPRSAVE(%rsp) + mov %rbx, 8*0+frame_GPRSAVE(%rsp) + mov %r12, 8*1+frame_GPRSAVE(%rsp) + mov %r13, 8*2+frame_GPRSAVE(%rsp) + mov %r14, 8*3+frame_GPRSAVE(%rsp) + mov %r15, 8*4+frame_GPRSAVE(%rsp) shl $7, NUM_BLKS# convert to bytes jz done_hash @@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx) mov NUM_BLKS, frame_INPEND(%rsp) ## load initial digest - mov 8*0(CTX),a - mov 8*1(CTX),b - mov 8*2(CTX),c - mov 8*3(CTX),d - mov 8*4(CTX),e - mov 8*5(CTX),f - mov 8*6(CTX),g - mov 8*7(CTX),h + mov 8*0(CTX1), a + mov 8*1(CTX1), b + mov 8*2(CTX1), c + mov 8*3(CTX1), d + mov 8*4(CTX1), e + mov 8*5(CTX1), f + mov 8*6(CTX1), g + mov 8*7(CTX1), h + + # save %rdi (CTX) before it gets clobbered + mov %rdi, frame_CTX(%rsp) vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK @@ -652,14 +655,15 @@ loop2: subq$1, frame_SRND(%rsp) jne loop2 - addm8*0(CTX),a - addm8*1(CTX),b - addm8*2(CTX),c - addm8*3(CTX),d - addm8*4(CTX),e - addm8*5(CTX),f - addm8*6(CTX),g - addm8*7(CTX),h + mov frame_CTX(%rsp), CTX2 + addm8*0(CTX2), a + addm8*1(CTX2), b + addm8*2(CTX2), c + addm8*3(CTX2), d + addm8*4(CTX2), e + addm8*5(CTX2), f + addm8*6(CTX2), g + addm8*7(CTX2), h mov frame_INP(%rsp), INP add $128, INP @@ -669,12 +673,11 @@ loop2: done_hash: # Restore GPRs - mov frame_GPRSAVE(%rsp) ,%rbp - mov 8*1+frame_GPRSAVE(%rsp) ,%rbx - mov 8*2+frame_GPRSAVE(%rsp) ,%r12 - mov 8*3+frame_GPRSAVE(%rsp) ,%r13 - mov 8*4+frame_GPRSAVE(%rsp) ,%r14 - mov 8*5+frame_GPRSAVE(%rsp) ,%r15 + mov 8*0+frame_GPRSAVE(%rsp), %rbx + mov 8*1+frame_GPRSAVE(%rsp), %r12 + mov 8*2+frame_GPRSAVE(%rsp), %r13 + mov 8*3+frame_GPRSAVE(%rsp), %r14 + mov 8*4+frame_GPRSAVE(%rsp), %r15 # Restore Stack Pointer mov frame_RSPSAVE(%rsp), %rsp -- 2.13.5
[PATCH v2 00/12] x86/crypto: Fix RBP usage in several crypto .S files
v2: - fix performance issues in sha256-avx2-asm.S and sha512-avx2-asm.S (Eric) Many of the x86 crypto functions use RBP as a temporary register. This breaks frame pointer convention, and breaks stack traces when unwinding from an interrupt in the crypto code. Convert most* of them to leave RBP alone. These pass the crypto boot tests for me. Any further testing would be appreciated! [*] There are still a few crypto files left that need fixing, but the fixes weren't trivial and nobody reported unwinder warnings about them yet, so I'm skipping them for now. Josh Poimboeuf (12): x86/crypto: Fix RBP usage in blowfish-x86_64-asm_64.S x86/crypto: Fix RBP usage in camellia-x86_64-asm_64.S x86/crypto: Fix RBP usage in cast5-avx-x86_64-asm_64.S x86/crypto: Fix RBP usage in cast6-avx-x86_64-asm_64.S x86/crypto: Fix RBP usage in des3_ede-asm_64.S x86/crypto: Fix RBP usage in sha1_avx2_x86_64_asm.S x86/crypto: Fix RBP usage in sha1_ssse3_asm.S x86/crypto: Fix RBP usage in sha256-avx-asm.S x86/crypto: Fix RBP usage in sha256-avx2-asm.S x86/crypto: Fix RBP usage in sha256-ssse3-asm.S x86/crypto: Fix RBP usage in sha512-avx2-asm.S x86/crypto: Fix RBP usage in twofish-avx-x86_64-asm_64.S arch/x86/crypto/blowfish-x86_64-asm_64.S| 48 +- arch/x86/crypto/camellia-x86_64-asm_64.S| 26 +- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 47 +++--- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 50 +-- arch/x86/crypto/des3_ede-asm_64.S | 15 +++--- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 4 +- arch/x86/crypto/sha1_ssse3_asm.S| 11 ++--- arch/x86/crypto/sha256-avx-asm.S| 15 +++--- arch/x86/crypto/sha256-avx2-asm.S | 22 +++-- arch/x86/crypto/sha256-ssse3-asm.S | 15 +++--- arch/x86/crypto/sha512-avx2-asm.S | 75 +++-- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 12 ++--- 12 files changed, 184 insertions(+), 156 deletions(-) -- 2.13.5
[PATCH v3 3/4] crypto: jz4780-rng: Add RNG node to jz4780.dtsi
Add RNG node to jz4780 dtsi. This driver uses registers that are part of the register set used by Ingenic CGU driver. Use regmap in RNG driver to access its register. Create 'simple-bus' node, make CGU and RNG node as child of it so that both the nodes are visible without changing CGU driver code. Signed-off-by: PrasannaKumar Muralidharan--- Changes in v3: * Create a cgublock node with "simple-bus" compatible * Make CGU and RNG node as children of cgublock node. Changes in v2: * Add "syscon" in CGU node's compatible section * Make RNG child node of CGU. arch/mips/boot/dts/ingenic/jz4780.dtsi | 25 - 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index 4853ef6..5953b97 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -34,14 +34,29 @@ clock-frequency = <32768>; }; - cgu: jz4780-cgu@1000 { - compatible = "ingenic,jz4780-cgu"; + cgublock { + compatible = "simple-bus"; + + #address-cells = <1>; + #size-cells = <1>; + reg = <0x1000 0x100>; + ranges; - clocks = <>, <>; - clock-names = "ext", "rtc"; + cgu: jz4780-cgu@0 { + compatible = "ingenic,jz4780-cgu"; + reg = <0x1000 0x100>; - #clock-cells = <1>; + clocks = <>, <>; + clock-names = "ext", "rtc"; + + #clock-cells = <1>; + }; + + rng: rng@d8 { + compatible = "ingenic,jz4780-rng"; + reg = <0x10d8 0x8>; + }; }; pinctrl: pin-controller@1001 { -- 2.10.0
[PATCH v3 4/4] crypto: jz4780-rng: Enable PRNG support in CI20 defconfig
Enable PRNG driver support in MIPS Creator CI20 default config. Signed-off-by: PrasannaKumar Muralidharan--- No changes in v3 No changes in v2 arch/mips/configs/ci20_defconfig | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index b42cfa7..9f48f2c 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -88,6 +88,11 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=5 CONFIG_SERIAL_8250_INGENIC=y CONFIG_SERIAL_OF_PLATFORM=y # CONFIG_HW_RANDOM is not set +CONFIG_CRYPTO_USER=y +CONFIG_CRYPTO_USER_API=y +CONFIG_CRYPTO_USER_API_RNG=y +CONFIG_CRYPTO_HW=y +CONFIG_CRYPTO_DEV_JZ4780_RNG=y CONFIG_I2C=y CONFIG_I2C_JZ4780=y CONFIG_GPIO_SYSFS=y -- 2.10.0
[PATCH v3 0/4] crypto: Add driver for JZ4780 PRNG
This patch series adds support of pseudo random number generator found in Ingenic's JZ4780 and X1000 SoC. Create cgublock node which has CGU and RNG node as its children. The cgublock node uses "simple-bus" compatible which helps in exposing CGU and RNG nodes without changing CGU driver. Add 'syscon' compatible in CGU node in jz4780.dtsi. The jz4780-rng driver uses regmap exposed via syscon interface to access the RNG registers. CGU driver is not modified in this patch set as registers used by CGU driver and this driver are different. PrasannaKumar Muralidharan (4): crypto: jz4780-rng: Add JZ4780 PRNG devicetree binding documentation crypto: jz4780-rng: Add Ingenic JZ4780 hardware PRNG driver crypto: jz4780-rng: Add RNG node to jz4780.dtsi crypto: jz4780-rng: Enable PRNG support in CI20 defconfig .../devicetree/bindings/rng/ingenic,jz4780-rng.txt | 21 +++ MAINTAINERS| 7 + arch/mips/boot/dts/ingenic/jz4780.dtsi | 25 ++- arch/mips/configs/ci20_defconfig | 5 + drivers/crypto/Kconfig | 19 ++ drivers/crypto/Makefile| 1 + drivers/crypto/jz4780-rng.c| 193 + 7 files changed, 266 insertions(+), 5 deletions(-) create mode 100644 Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt create mode 100644 drivers/crypto/jz4780-rng.c -- 2.10.0
[PATCH v3 2/4] crypto: jz4780-rng: Add Ingenic JZ4780 hardware PRNG driver
JZ4780 SoC pseudo random number generator driver using crypto framework. Adding a delay before reading RNG data and disabling RNG after reading data was suggested by Jeffery Walton. Tested-by: Mathieu MalaterreSuggested-by: Jeffrey Walton Signed-off-by: PrasannaKumar Muralidharan --- Changes in v3: * Add seeding support * Reduce delay Changes in v2: * Fixed buffer overflow in generate function pointed out in Stephan's review * Fold patch that had only MAINTAINERS file change with this patch * Removed unnecessary comment in code MAINTAINERS | 7 ++ drivers/crypto/Kconfig | 19 + drivers/crypto/Makefile | 1 + drivers/crypto/jz4780-rng.c | 193 4 files changed, 220 insertions(+) create mode 100644 drivers/crypto/jz4780-rng.c diff --git a/MAINTAINERS b/MAINTAINERS index 2093060..d2341a7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6783,6 +6783,13 @@ L: linux-...@lists.infradead.org S: Maintained F: drivers/mtd/nand/jz4780_* +INGENIC JZ4780 PRNG DRIVER +M: PrasannaKumar Muralidharan +L: linux-crypto@vger.kernel.org +S: Maintained +F: drivers/crypto/jz4780-rng.c +F: Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt + INOTIFY M: Jan Kara R: Amir Goldstein diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index fe33c19..f3ac1cd 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -613,6 +613,25 @@ config CRYPTO_DEV_IMGTEC_HASH hardware hash accelerator. Supporting MD5/SHA1/SHA224/SHA256 hashing algorithms. +config CRYPTO_DEV_JZ4780_RNG + tristate "JZ4780 HW pseudo random number generator support" + depends on MACH_JZ4780 || COMPILE_TEST + depends on HAS_IOMEM + select CRYPTO_RNG + select REGMAP + select SYSCON + select MFD_SYSCON + ---help--- + This driver provides kernel-side support through the + cryptographic API for the pseudo random number generator + hardware found in ingenic JZ4780 and X1000 SoC. MIPS + Creator CI20 uses JZ4780 SoC. + + To compile this driver as a module, choose M here: the + module will be called jz4780-rng. + + If unsure, say Y. + config CRYPTO_DEV_SUN4I_SS tristate "Support for Allwinner Security System cryptographic accelerator" depends on ARCH_SUNXI && !64BIT diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 808432b..a09d9f4 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_CRYPTO_DEV_GEODE) += geode-aes.o obj-$(CONFIG_CRYPTO_DEV_HIFN_795X) += hifn_795x.o obj-$(CONFIG_CRYPTO_DEV_IMGTEC_HASH) += img-hash.o obj-$(CONFIG_CRYPTO_DEV_IXP4XX) += ixp4xx_crypto.o +obj-$(CONFIG_CRYPTO_DEV_JZ4780_RNG) += jz4780-rng.o obj-$(CONFIG_CRYPTO_DEV_MV_CESA) += mv_cesa.o obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell/ obj-$(CONFIG_CRYPTO_DEV_MEDIATEK) += mediatek/ diff --git a/drivers/crypto/jz4780-rng.c b/drivers/crypto/jz4780-rng.c new file mode 100644 index 000..918ba94 --- /dev/null +++ b/drivers/crypto/jz4780-rng.c @@ -0,0 +1,193 @@ +/* + * jz4780-rng.c - Random Number Generator driver for the jz4780 + * + * Copyright (c) 2017 PrasannaKumar Muralidharan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#define REG_RNG_CTRL 0xD8 +#define REG_RNG_DATA 0xDC + +/* Context for crypto */ +struct jz4780_rng_ctx { + struct jz4780_rng *rng; +}; + +/* Device associated memory */ +struct jz4780_rng { + struct device *dev; + struct regmap *regmap; + u32 seed; +}; + +static struct jz4780_rng *jz4780_rng; + +static int jz4780_rng_readl(struct jz4780_rng *rng, u32 offset) +{ + u32 val = 0; + int ret; + + ret = regmap_read(rng->regmap, offset, ); + if (!ret) + return val; + + return ret; +} + +static int jz4780_rng_writel(struct jz4780_rng *rng, u32 val, u32 offset) +{ + return regmap_write(rng->regmap, offset, val); +} + +static int jz4780_rng_seed(struct crypto_rng *tfm, const u8 *seed, + unsigned int slen) +{ + struct
[PATCH v3 1/4] crypto: jz4780-rng: Add JZ4780 PRNG devicetree binding documentation
Add devicetree bindings for hardware pseudo random number generator present in Ingenic JZ4780 SoC. Signed-off-by: PrasannaKumar Muralidharan--- Changes in v3: * Create a cgublock node with "simple-bus" compatible * Make CGU and RNG node as children of cgublock node. Changes in v2: * Add "syscon" in CGU node's compatible section * Make RNG child node of CGU. .../devicetree/bindings/rng/ingenic,jz4780-rng.txt | 21 + 1 file changed, 21 insertions(+) create mode 100644 Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt diff --git a/Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt b/Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt new file mode 100644 index 000..765df9c --- /dev/null +++ b/Documentation/devicetree/bindings/rng/ingenic,jz4780-rng.txt @@ -0,0 +1,21 @@ +Ingenic jz4780 RNG driver + +Required properties: +- compatible : Should be "ingenic,jz4780-rng" + +Example: + +cgublock { + compatible = "simple-bus"; + + #address-cells = <1>; + #size-cells = <1>; + + reg = <0x1000 0x100>; + ranges; + + rng: rng@d8 { + compatible = "ingenic,jz4780-rng"; + reg = <0x10d8 0x8>; + }; +}; -- 2.10.0
Re: [PATCH v3 0/3] STM32 CRYP crypto driver
Just a gentle ping ... or have I missed out on a reply? On 18/08/17 11:19, Fabien Dessenne wrote: > This set of patches adds a new crypto driver for STMicroelectronics stm32 HW. > This drivers uses the crypto API and provides with HW-enabled AEAD and block > cipher algorithms. > It makes use of the crypto engine which is upgraded in order to support AEAD > requests. > > This driver was successfully tested with tcrypt / testmgr. > > Changes since v3: > - update dt-bindings with Rob Herring remarks > > Changes since v2: > - update dt-bindings (interrupts description) > - rebase on STM32 crypto patches (L. Debieve : update CRC32 + add HASH) > > Fabien Dessenne (3): >crypto: engine - permit to enqueue aead_request >dt-bindings: Document STM32 CRYP bindings >crypto: stm32 - Support for STM32 CRYP crypto module > > .../devicetree/bindings/crypto/st,stm32-cryp.txt | 19 + > crypto/crypto_engine.c | 101 + > drivers/crypto/stm32/Kconfig |9 + > drivers/crypto/stm32/Makefile |3 +- > drivers/crypto/stm32/stm32-cryp.c | 1962 > > include/crypto/engine.h| 16 + > 6 files changed, 2109 insertions(+), 1 deletion(-) > create mode 100644 > Documentation/devicetree/bindings/crypto/st,stm32-cryp.txt > create mode 100644 drivers/crypto/stm32/stm32-cryp.c >
Re: [PATCH 1/2] crypto: stm32 - Fix uninitialized data usage
Hi Arnd, I've already push this fix for review last month, waiting the ack. " From: Lionel DebieveTo: Herbert Xu , "David S . Miller" , Maxime Coquelin , Alexandre Torgue , , , CC: Benjamin Gaignard , Fabien Dessenne , Ludovic Barre Subject: [PATCH 1/1] crypto: stm32/hash - Remove uninitialized symbol Date: Fri, 18 Aug 2017 15:54:01 +0200 " Sorry if you receive this mail twice, I didn't see any mail in the mailing list, maybe server issue. I'm reviewing your second part patch. BR, Lionel > On 09/12/2017 11:35 AM, Arnd Bergmann wrote: >> The error handling in stm32_hash_irq_thread passes >> uninitialized data into stm32_hash_finish_req, as gcc >> points out: >> drivers/crypto/stm32/stm32-hash.c: In function 'stm32_hash_irq_thread': >> drivers/crypto/stm32/stm32-hash.c:1088:2: error: 'err' may be used >> uninitialized in this function [-Werror=maybe-uninitialized] >> I could not tell what data should be passed there instead, >> so this changes the code to always pass zero, making it >> well-defined, though possibly still wrong. Please check. >> Signed-off-by: Arnd Bergmann >> --- >>drivers/crypto/stm32/stm32-hash.c | 3 +-- >>1 file changed, 1 insertion(+), 2 deletions(-) >> diff --git a/drivers/crypto/stm32/stm32-hash.c >> b/drivers/crypto/stm32/stm32-hash.c >> index b585ce54a802..3c23a23e9ee5 100644 >> --- a/drivers/crypto/stm32/stm32-hash.c >> +++ b/drivers/crypto/stm32/stm32-hash.c >> @@ -1067,7 +1067,6 @@ static int stm32_hash_cra_sha256_init(struct >> crypto_tfm *tfm) >>static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) >>{ >>struct stm32_hash_dev *hdev = dev_id; >> -int err; >> >>if (HASH_FLAGS_CPU & hdev->flags) { >>if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { >> @@ -1085,7 +1084,7 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void >> *dev_id) >> >>finish: >>/*Finish current request */ >> -stm32_hash_finish_req(hdev->req, err); >> +stm32_hash_finish_req(hdev->req, 0); >> >>return IRQ_HANDLED; >>} >