[PATCH v3] crypto: ecc: Remove stack VLA usage

2018-03-26 Thread Kees Cook
On the quest to remove all VLAs from the kernel[1], this avoids VLAs
by just using the maximum allocation size (4 bytes) for stack arrays.
All the VLAs in ecc were either 3 or 4 bytes (or a multiple), so just
make it 4 bytes all the time. Initialization routines are adjusted to
check that ndigits does not end up larger than the arrays.

[1] https://lkml.org/lkml/2018/3/7/621

Signed-off-by: Kees Cook 
---
This expects 14de52112ee70ca289fa77bf2d9cbc79fd2c811f to be reverted.
---
 crypto/ecc.c  | 47 ---
 crypto/ecc.h  |  4 +++-
 crypto/ecdh.c |  4 ++--
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/crypto/ecc.c b/crypto/ecc.c
index 18f32f2a5e1c..815541309a95 100644
--- a/crypto/ecc.c
+++ b/crypto/ecc.c
@@ -515,7 +515,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 
*product,
 static bool vli_mmod_fast(u64 *result, u64 *product,
  const u64 *curve_prime, unsigned int ndigits)
 {
-   u64 tmp[2 * ndigits];
+   u64 tmp[2 * ECC_MAX_DIGITS];
 
switch (ndigits) {
case 3:
@@ -536,7 +536,7 @@ static bool vli_mmod_fast(u64 *result, u64 *product,
 static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
  const u64 *curve_prime, unsigned int ndigits)
 {
-   u64 product[2 * ndigits];
+   u64 product[2 * ECC_MAX_DIGITS];
 
vli_mult(product, left, right, ndigits);
vli_mmod_fast(result, product, curve_prime, ndigits);
@@ -546,7 +546,7 @@ static void vli_mod_mult_fast(u64 *result, const u64 *left, 
const u64 *right,
 static void vli_mod_square_fast(u64 *result, const u64 *left,
const u64 *curve_prime, unsigned int ndigits)
 {
-   u64 product[2 * ndigits];
+   u64 product[2 * ECC_MAX_DIGITS];
 
vli_square(product, left, ndigits);
vli_mmod_fast(result, product, curve_prime, ndigits);
@@ -560,8 +560,8 @@ static void vli_mod_square_fast(u64 *result, const u64 
*left,
 static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
unsigned int ndigits)
 {
-   u64 a[ndigits], b[ndigits];
-   u64 u[ndigits], v[ndigits];
+   u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
+   u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
u64 carry;
int cmp_result;
 
@@ -649,8 +649,8 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 
*z1,
  u64 *curve_prime, unsigned int ndigits)
 {
/* t1 = x, t2 = y, t3 = z */
-   u64 t4[ndigits];
-   u64 t5[ndigits];
+   u64 t4[ECC_MAX_DIGITS];
+   u64 t5[ECC_MAX_DIGITS];
 
if (vli_is_zero(z1, ndigits))
return;
@@ -711,7 +711,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 
*z1,
 static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime,
unsigned int ndigits)
 {
-   u64 t1[ndigits];
+   u64 t1[ECC_MAX_DIGITS];
 
vli_mod_square_fast(t1, z, curve_prime, ndigits);/* z^2 */
vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */
@@ -724,7 +724,7 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, 
u64 *y2,
u64 *p_initial_z, u64 *curve_prime,
unsigned int ndigits)
 {
-   u64 z[ndigits];
+   u64 z[ECC_MAX_DIGITS];
 
vli_set(x2, x1, ndigits);
vli_set(y2, y1, ndigits);
@@ -750,7 +750,7 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, 
u64 *curve_prime,
 unsigned int ndigits)
 {
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-   u64 t5[ndigits];
+   u64 t5[ECC_MAX_DIGITS];
 
/* t5 = x2 - x1 */
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@@ -791,9 +791,9 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, 
u64 *curve_prime,
   unsigned int ndigits)
 {
/* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
-   u64 t5[ndigits];
-   u64 t6[ndigits];
-   u64 t7[ndigits];
+   u64 t5[ECC_MAX_DIGITS];
+   u64 t6[ECC_MAX_DIGITS];
+   u64 t7[ECC_MAX_DIGITS];
 
/* t5 = x2 - x1 */
vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
@@ -846,9 +846,9 @@ static void ecc_point_mult(struct ecc_point *result,
   unsigned int ndigits)
 {
/* R0 and R1 */
-   u64 rx[2][ndigits];
-   u64 ry[2][ndigits];
-   u64 z[ndigits];
+   u64 rx[2][ECC_MAX_DIGITS];
+   u64 ry[2][ECC_MAX_DIGITS];
+   u64 z[ECC_MAX_DIGITS];
int i, nb;
int num_bits = vli_num_bits(scalar, ndigits);
 
@@ -943,13 +943,13 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int 
ndigits,
 int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey)
 {
const struct ecc_curve *curve = ecc_get_curve(curve_id);
-   u64 priv[ndigits];
+   u64 priv[ECC_MAX_DIGITS];

Re: [PATCH v2] crypto/ecc: Remove stack VLA usage

2018-03-26 Thread Kees Cook
On Fri, Mar 16, 2018 at 8:56 AM, Herbert Xu  wrote:
> On Thu, Mar 08, 2018 at 01:57:02PM -0800, Kees Cook wrote:
>> On the quest to remove all VLAs from the kernel[1], this switches to
>> a pair of kmalloc regions instead of using the stack. This also moves
>> the get_random_bytes() after all allocations (and drops the needless
>> "nbytes" variable).
>>
>> [1] https://lkml.org/lkml/2018/3/7/621
>>
>> Signed-off-by: Kees Cook 
>
> Patch applied.  Thanks.

Hi, sorry for the noise on this one: I messed up looking at the ecc
code (I confused myself into thinking there was only a single instance
of the problem). The applied patch is both incomplete and inefficient.
I have a much simpler solution, and I'll send that with a revert...

-Kees

-- 
Kees Cook
Pixel Security


Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe

2018-03-26 Thread Logan Gunthorpe


On 26/03/18 01:50 PM, Arnd Bergmann wrote:
> I wouldn't expect it to matter: the byte swap is almost always much
> cheaper compared to the actual bus access for the MMIO, and I
> would also guess that modern compilers can eliminate the double
> byte swap on architectures where writel() is an inline function. Most of
> the important architectures use ARCH_USE_BUILTIN_BSWAP, which
> guarantees that.

Fair enough. Sometime this week I'll update my patch set to change that.

Thanks,

Logan


Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe

2018-03-26 Thread Arnd Bergmann
On Mon, Mar 26, 2018 at 6:21 PM, Logan Gunthorpe  wrote:
>
>
> On 26/03/18 04:53 AM, Arnd Bergmann wrote:
>> On most architectures, this is not important:
>> - For x86, the stores are aways atomic and no additional barriers
>>   are needed, so the two are the same
>> - For ARM (both 32 and 64-bit), powerpc and many others, we don't
>>   use the generic iowrite() and just fall back to writel() or
>>   writel(swab32()).
>>
>> However, shouldn't we just use the writel(swab32()) logic here as well
>> for the common case rather than risking missing barriers?
>
> Hmm, I don't know... it's complicated?
>
> Doing a bit of digging shows that the existing code was written during a
> time when writel() did not include extra barriers over __raw_writel() in
> any of the common arches.
>
> The commit logs don't seem to provide any guidance as to why this it was
> done this way, but I'd assume it was done to avoid a double swab() call
> on BE arches. Seeing writel() is typically implemented as:
>
> __raw_writel(__cpu_to_le32(value), addr);
>
> Then on BE arches, writel(swab32()) would become:
>
> __raw_writel(swab32(swab32(value)), addr)
>
> Which seems undesirable.

I wouldn't expect it to matter: the byte swap is almost always much
cheaper compared to the actual bus access for the MMIO, and I
would also guess that modern compilers can eliminate the double
byte swap on architectures where writel() is an inline function. Most of
the important architectures use ARCH_USE_BUILTIN_BSWAP, which
guarantees that.

   Arnd


Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe

2018-03-26 Thread Logan Gunthorpe


On 26/03/18 04:53 AM, Arnd Bergmann wrote:
> On most architectures, this is not important:
> - For x86, the stores are aways atomic and no additional barriers
>   are needed, so the two are the same
> - For ARM (both 32 and 64-bit), powerpc and many others, we don't
>   use the generic iowrite() and just fall back to writel() or
>   writel(swab32()).
> 
> However, shouldn't we just use the writel(swab32()) logic here as well
> for the common case rather than risking missing barriers?

Hmm, I don't know... it's complicated?

Doing a bit of digging shows that the existing code was written during a
time when writel() did not include extra barriers over __raw_writel() in
any of the common arches.

The commit logs don't seem to provide any guidance as to why this it was
done this way, but I'd assume it was done to avoid a double swab() call
on BE arches. Seeing writel() is typically implemented as:

__raw_writel(__cpu_to_le32(value), addr);

Then on BE arches, writel(swab32()) would become:

__raw_writel(swab32(swab32(value)), addr)

Which seems undesirable.

Logan


Re: in-kernel user of ecdsa

2018-03-26 Thread Tudor Ambarus



On 03/12/2018 07:07 PM, Tudor Ambarus wrote:


Would you consider using ECDSA in the kernel module signing facility?


Any feedback is good. I can invest some time to make this happen, if
needed.


When compared with RSA, ECDSA has shorter keys, the key generation
process is faster, the sign operation is faster, but the verify
operation is slower than with RSA.

Smaller key sizes imply reduced memory footprint and bandwidth that are
especially attractive for memory constrained devices. I'm working with
such a device, capable of generating ecc keys, secure key storage and
ecdsa/ecdh crypto acceleration. I'm trying to find an in-kernel user of
ecdsa.


ECDSA and RSA comparison

-> ECDSA requires a much smaller key length in order to provide the same
security strength as RSA [1]:

Security StrengthRSA (bits)ECDSA (bits)
112   2048 224 - 255
128   3072 256 - 383
192   7680 384 - 511
256  15360 512+

7680 and 15360  keys are not included in the NIST standards for
interoperability and efficiency reasons, the keys are just too big.

-> key generation: ECC key generation is faster than IFC (Integer -
Factorization Cryptography). RSA private key is based on large prime
numbers, while for ECDSA any positive integer less than n is a valid
private key.

-> ECDSA sign operations are faster than RSA, but verify operations are
slower. Here's an openssl speed test that I've run on my computer:

   signverifysign/s verify/s
rsa 2048 bits 0.000604s 0.18s   1656.3  56813.7
rsa 4096 bits 0.004027s 0.62s248.3  16052.5

  signverifysign/s verify/s
256 bit ecdsa (nistp256)   0.s   0.0001s  28986.4  13516.3
384 bit ecdsa (nistp384)   0.0002s   0.0008s   5541.0   1322.2
521 bit ecdsa (nistp521)   0.0003s   0.0006s   3104.2   1756.2

Best,
ta

[1] NIST SP 800-57 Pt. 1 Rev. 4, Recommendation for key management
--
To unsubscribe from this list: send the line "unsubscribe keyrings" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html



Re: [PATCH v6 04/12] ima: Introduce is_ima_sig()

2018-03-26 Thread Mimi Zohar
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote:
> With the introduction of another IMA signature type (modsig), some places
> will need to check for both of them. It is cleaner to do that if there's a
> helper function to tell whether an xattr_value represents an IMA
> signature.

Initially the function name "is_ima_sig" is fine, since it reflects
the 'imasig' type.  Having a more generic function name would be
better when adding 'modsig' support.  As long as the function is
locally define, we can drop 'ima' from the name.  Perhaps something
like has_signature or is_signed() would be preferable.

Mimi


> 
> Suggested-by: Mimi Zohar 
> Signed-off-by: Thiago Jung Bauermann 
> ---
>  security/integrity/ima/ima.h  | 5 +
>  security/integrity/ima/ima_appraise.c | 7 +++
>  security/integrity/ima/ima_template_lib.c | 2 +-
>  3 files changed, 9 insertions(+), 5 deletions(-)
> 
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index 35fe91aa1fc9..4bafa6a97967 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -155,6 +155,11 @@ unsigned long ima_get_binary_runtime_size(void);
>  int ima_init_template(void);
>  void ima_init_template_list(void);
> 
> +static inline bool is_ima_sig(const struct evm_ima_xattr_data *xattr_value)
> +{
> + return xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG;
> +}
> +
>  /*
>   * used to protect h_table and sha_table
>   */
> diff --git a/security/integrity/ima/ima_appraise.c 
> b/security/integrity/ima/ima_appraise.c
> index a6b2995b7d0b..01172eab297b 100644
> --- a/security/integrity/ima/ima_appraise.c
> +++ b/security/integrity/ima/ima_appraise.c
> @@ -325,15 +325,14 @@ int ima_appraise_measurement(enum ima_hooks func,
>   } else if (status != INTEGRITY_PASS) {
>   /* Fix mode, but don't replace file signatures. */
>   if ((ima_appraise & IMA_APPRAISE_FIX) &&
> - (!xattr_value ||
> -  xattr_value->type != EVM_IMA_XATTR_DIGSIG)) {
> + !is_ima_sig(xattr_value)) {
>   if (!ima_fix_xattr(dentry, iint))
>   status = INTEGRITY_PASS;
>   }
> 
>   /* Permit new files with file signatures, but without data. */
>   if (inode->i_size == 0 && iint->flags & IMA_NEW_FILE &&
> - xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG) {
> + is_ima_sig(xattr_value)) {
>   status = INTEGRITY_PASS;
>   }
> 
> @@ -448,7 +447,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char 
> *xattr_name,
>   if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST))
>   return -EINVAL;
>   ima_reset_appraise_flags(d_backing_inode(dentry),
> - xvalue->type == EVM_IMA_XATTR_DIGSIG);
> +  is_ima_sig(xvalue));
>   result = 0;
>   }
>   return result;
> diff --git a/security/integrity/ima/ima_template_lib.c 
> b/security/integrity/ima/ima_template_lib.c
> index 5afaa53decc5..afb52a90e532 100644
> --- a/security/integrity/ima/ima_template_lib.c
> +++ b/security/integrity/ima/ima_template_lib.c
> @@ -380,7 +380,7 @@ int ima_eventsig_init(struct ima_event_data *event_data,
>  {
>   struct evm_ima_xattr_data *xattr_value = event_data->xattr_value;
> 
> - if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG))
> + if (!is_ima_sig(xattr_value))
>   return 0;
> 
>   return ima_write_template_field_data(xattr_value, event_data->xattr_len,
> 



Re: [PATCH v6 11/12] ima: Implement support for module-style appended signatures

2018-03-26 Thread Mimi Zohar
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote:
> This patch actually implements the appraise_type=imasig|modsig option,
> allowing IMA to read and verify modsig signatures.
> 
> In case both are present in the same file, IMA will first check whether the
> key used by the xattr signature is present in the kernel keyring. If not,
> it will try the appended signature.

Yes, this sounds right.

> 
> Signed-off-by: Thiago Jung Bauermann 
> ---
>  security/integrity/ima/ima.h  | 11 +++-
>  security/integrity/ima/ima_appraise.c | 53 
> +++
>  security/integrity/ima/ima_main.c | 21 +++---
>  3 files changed, 74 insertions(+), 11 deletions(-)
> 
> diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
> index 49aef56dc96d..c11ccb7c5bfb 100644
> --- a/security/integrity/ima/ima.h
> +++ b/security/integrity/ima/ima.h
> @@ -157,7 +157,8 @@ void ima_init_template_list(void);
> 
>  static inline bool is_ima_sig(const struct evm_ima_xattr_data *xattr_value)
>  {
> - return xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG;
> + return xattr_value && (xattr_value->type == EVM_IMA_XATTR_DIGSIG ||
> +xattr_value->type == IMA_MODSIG);
>  }
> 
>  /*
> @@ -253,6 +254,8 @@ enum integrity_status ima_get_cache_status(struct 
> integrity_iint_cache *iint,
>  enum ima_hooks func);
>  enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value,
>int xattr_len);
> +bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data *xattr_value,
> +  int xattr_len);
>  int ima_read_xattr(struct dentry *dentry,
>  struct evm_ima_xattr_data **xattr_value);
> 
> @@ -291,6 +294,12 @@ ima_get_hash_algo(struct evm_ima_xattr_data 
> *xattr_value, int xattr_len)
>   return ima_hash_algo;
>  }
> 
> +static inline bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data
> +*xattr_value, int xattr_len)
> +{
> + return false;
> +}
> +
>  static inline int ima_read_xattr(struct dentry *dentry,
>struct evm_ima_xattr_data **xattr_value)
>  {
> diff --git a/security/integrity/ima/ima_appraise.c 
> b/security/integrity/ima/ima_appraise.c
> index 01172eab297b..84e0fd5a19c8 100644
> --- a/security/integrity/ima/ima_appraise.c
> +++ b/security/integrity/ima/ima_appraise.c
> @@ -189,6 +189,22 @@ enum hash_algo ima_get_hash_algo(struct 
> evm_ima_xattr_data *xattr_value,
>   return ima_hash_algo;
>  }
> 
> +bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data *xattr_value,
> +  int xattr_len)
> +{
> + struct key *keyring;
> +
> + if (xattr_value->type != EVM_IMA_XATTR_DIGSIG)
> + return false;
> +
> + keyring = integrity_keyring_from_id(INTEGRITY_KEYRING_IMA);
> + if (IS_ERR(keyring))
> + return false;
> +
> + return asymmetric_sig_has_known_key(keyring, (const char *) xattr_value,
> + xattr_len);
> +}
> +
>  int ima_read_xattr(struct dentry *dentry,
>  struct evm_ima_xattr_data **xattr_value)
>  {
> @@ -221,8 +237,12 @@ int ima_appraise_measurement(enum ima_hooks func,
>   struct inode *inode = d_backing_inode(dentry);
>   enum integrity_status status = INTEGRITY_UNKNOWN;
>   int rc = xattr_len, hash_start = 0;
> + size_t xattr_contents_len;
> + void *xattr_contents;
> 
> - if (!(inode->i_opflags & IOP_XATTR))
> + /* If not appraising a modsig, we need an xattr. */
> + if ((xattr_value == NULL || xattr_value->type != IMA_MODSIG) &&
> + !(inode->i_opflags & IOP_XATTR))
>   return INTEGRITY_UNKNOWN;
> 
>   if (rc <= 0) {
> @@ -241,13 +261,29 @@ int ima_appraise_measurement(enum ima_hooks func,
>   goto out;
>   }
> 
> - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
> + /*
> +  * If it's a modsig, we don't have the xattr contents to pass to
> +  * evm_verifyxattr().
> +  */
> + if (xattr_value->type == IMA_MODSIG) {
> + xattr_contents = NULL;
> + xattr_contents_len = 0;
> + } else {
> + xattr_contents = xattr_value;
> + xattr_contents_len = xattr_len;
> + }
> +
> + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_contents,
> +  xattr_contents_len, iint);
>   switch (status) {
>   case INTEGRITY_PASS:
>   case INTEGRITY_PASS_IMMUTABLE:
>   case INTEGRITY_UNKNOWN:
>   break;
>   case INTEGRITY_NOXATTRS:/* No EVM protected xattrs. */
> + /* It's fine not to have xattrs when using a modsig. */
> + if (xattr_value->type == IMA_MODSIG)
> + break;
>   case INTEGRITY_NOLABEL: 

Re: [PATCH v6 12/12] ima: Write modsig to the measurement list

2018-03-26 Thread Mimi Zohar
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote:
> Define new "d-sig" template field which holds the digest that is expected
> to match the one contained in the modsig.
> 
> Also add modsig support to the "sig" template field, allowing the the
> contents of the modsig to be included in the measurement list.

Although including the appended signature in the template data doesn't
make sense on its own, as the file digest (without the appended
signature) is needed to validate the appended signature, defining a
new template field and its usage should be independent of other
changes.

Mimi

> 
> Suggested-by: Mimi Zohar 
> Signed-off-by: Thiago Jung Bauermann 
> ---
>  Documentation/security/IMA-templates.rst  |  5 
>  security/integrity/ima/ima_template.c |  4 ++-
>  security/integrity/ima/ima_template_lib.c | 47 
> +--
>  security/integrity/ima/ima_template_lib.h |  2 ++
>  4 files changed, 55 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/security/IMA-templates.rst 
> b/Documentation/security/IMA-templates.rst
> index 2cd0e273cc9a..f2a0f4225857 100644
> --- a/Documentation/security/IMA-templates.rst
> +++ b/Documentation/security/IMA-templates.rst
> @@ -68,6 +68,11 @@ descriptors by adding their identifier to the format string
>   - 'd-ng': the digest of the event, calculated with an arbitrary hash
> algorithm (field format: [:]digest, where the digest
> prefix is shown only if the hash algorithm is not SHA1 or MD5);
> + - 'd-sig': the digest of the event for files that have an appended modsig. 
> This
> +   field is calculated without including the modsig and thus will differ from
> +   the total digest of the file, but it is what should match the digest
> +   contained in the modsig (if it doesn't, the signature is invalid). It is
> +   shown in the same format as 'd-ng';
>   - 'n-ng': the name of the event, without size limitations;
>   - 'sig': the file signature.
> 
> diff --git a/security/integrity/ima/ima_template.c 
> b/security/integrity/ima/ima_template.c
> index 30db39b23804..36fc32f538b5 100644
> --- a/security/integrity/ima/ima_template.c
> +++ b/security/integrity/ima/ima_template.c
> @@ -43,8 +43,10 @@ static struct ima_template_field supported_fields[] = {
>.field_show = ima_show_template_string},
>   {.field_id = "sig", .field_init = ima_eventsig_init,
>.field_show = ima_show_template_sig},
> + {.field_id = "d-sig", .field_init = ima_eventdigest_sig_init,
> +  .field_show = ima_show_template_digest_ng},
>  };
> -#define MAX_TEMPLATE_NAME_LEN 15
> +#define MAX_TEMPLATE_NAME_LEN 24
> 
>  static struct ima_template_desc *ima_template;
>  static struct ima_template_desc *lookup_template_desc(const char *name);
> diff --git a/security/integrity/ima/ima_template_lib.c 
> b/security/integrity/ima/ima_template_lib.c
> index afb52a90e532..1dca082cce43 100644
> --- a/security/integrity/ima/ima_template_lib.c
> +++ b/security/integrity/ima/ima_template_lib.c
> @@ -220,7 +220,8 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void 
> **bufcurp,
>   return 0;
>  }
> 
> -static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 
> hash_algo,
> +static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize,
> +u8 hash_algo,
>  struct ima_field_data *field_data)
>  {
>   /*
> @@ -323,6 +324,35 @@ int ima_eventdigest_ng_init(struct ima_event_data 
> *event_data,
>  hash_algo, field_data);
>  }
> 
> +/*
> + * This function writes the digest of the file which is expected to match the
> + * digest contained in the file's embedded signature.
> + */
> +int ima_eventdigest_sig_init(struct ima_event_data *event_data,
> +  struct ima_field_data *field_data)
> +{
> + struct evm_ima_xattr_data *xattr_value = event_data->xattr_value;
> + enum hash_algo hash_algo = HASH_ALGO_SHA1;
> + const u8 *cur_digest = NULL;
> + u8 cur_digestsize = 0;
> + int ret;
> +
> + if (!xattr_value || xattr_value->type != IMA_MODSIG)
> + return 0;
> +
> + if (event_data->violation)  /* recording a violation. */
> + goto out;
> +
> + ret = ima_get_modsig_hash(xattr_value, _algo, _digest,
> +   _digestsize);
> + if (ret)
> + return ret;
> +
> + out:
> + return ima_eventdigest_init_common(cur_digest, cur_digestsize,
> +hash_algo, field_data);
> +}
> +
>  static int ima_eventname_init_common(struct ima_event_data *event_data,
>struct ima_field_data *field_data,
>bool size_limit)
> @@ -379,10 +409,23 @@ int ima_eventsig_init(struct ima_event_data *event_data,
> struct ima_field_data 

[PATCH] crypto: rsa - remove unneeded initializations

2018-03-26 Thread Tudor Ambarus
Remove useless assignment of ret to -ENOMEM in rsa_verify.
Remove useless initialization of ret to zero at declaration in
rsa_enc/dec/sign/verify.

Benefit of the power of undefined values and set ret in branches in
rsa_enc/dec/sign.

Reported-by: Benjamin Bales 
Signed-off-by: Tudor Ambarus 
---
 crypto/rsa.c | 24 +---
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/crypto/rsa.c b/crypto/rsa.c
index b067f3a..e75ce09 100644
--- a/crypto/rsa.c
+++ b/crypto/rsa.c
@@ -88,7 +88,7 @@ static int rsa_enc(struct akcipher_request *req)
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
MPI m, c = mpi_alloc(0);
-   int ret = 0;
+   int ret;
int sign;
 
if (!c)
@@ -99,10 +99,11 @@ static int rsa_enc(struct akcipher_request *req)
goto err_free_c;
}
 
-   ret = -ENOMEM;
m = mpi_read_raw_from_sgl(req->src, req->src_len);
-   if (!m)
+   if (!m) {
+   ret = -ENOMEM;
goto err_free_c;
+   }
 
ret = _rsa_enc(pkey, c, m);
if (ret)
@@ -127,7 +128,7 @@ static int rsa_dec(struct akcipher_request *req)
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
MPI c, m = mpi_alloc(0);
-   int ret = 0;
+   int ret;
int sign;
 
if (!m)
@@ -138,10 +139,11 @@ static int rsa_dec(struct akcipher_request *req)
goto err_free_m;
}
 
-   ret = -ENOMEM;
c = mpi_read_raw_from_sgl(req->src, req->src_len);
-   if (!c)
+   if (!c) {
+   ret = -ENOMEM;
goto err_free_m;
+   }
 
ret = _rsa_dec(pkey, m, c);
if (ret)
@@ -165,7 +167,7 @@ static int rsa_sign(struct akcipher_request *req)
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
MPI m, s = mpi_alloc(0);
-   int ret = 0;
+   int ret;
int sign;
 
if (!s)
@@ -176,10 +178,11 @@ static int rsa_sign(struct akcipher_request *req)
goto err_free_s;
}
 
-   ret = -ENOMEM;
m = mpi_read_raw_from_sgl(req->src, req->src_len);
-   if (!m)
+   if (!m) {
+   ret = -ENOMEM;
goto err_free_s;
+   }
 
ret = _rsa_sign(pkey, s, m);
if (ret)
@@ -204,7 +207,7 @@ static int rsa_verify(struct akcipher_request *req)
struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
const struct rsa_mpi_key *pkey = rsa_get_key(tfm);
MPI s, m = mpi_alloc(0);
-   int ret = 0;
+   int ret;
int sign;
 
if (!m)
@@ -215,7 +218,6 @@ static int rsa_verify(struct akcipher_request *req)
goto err_free_m;
}
 
-   ret = -ENOMEM;
s = mpi_read_raw_from_sgl(req->src, req->src_len);
if (!s) {
ret = -ENOMEM;
-- 
2.9.4



Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe

2018-03-26 Thread Arnd Bergmann
On Wed, Mar 21, 2018 at 5:37 PM, Logan Gunthorpe  wrote:
> The semantics of the iowriteXXbe() functions are to write a
> value in CPU endianess to an IO register that is known by the
> caller to be in Big Endian. The mmio_writeXXbe() macro, which
> is called by iowriteXXbe(), should therefore use cpu_to_beXX()
> instead of beXX_to_cpu().
>
> Seeing both beXX_to_cpu() and cpu_to_beXX() are both functionally
> implemented as either null operations or swabXX operations there
> was no noticable bug here. But it is confusing for both developers
> and code analysis tools alike.
>
> Signed-off-by: Logan Gunthorpe 

Your patch is a clear improvement of what we had before, but I notice
that we have a weird asymmetry between big-endian and little-endian
accessors before and after this patch:

void iowrite32(u32 val, void __iomem *addr)
{
IO_COND(addr, outl(val,port), writel(val, addr));
}
void iowrite32be(u32 val, void __iomem *addr)
{
IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr));
}

The little-endian iowrite32() when applied to mmio registers uses
a 32-bit wide atomic store to a little-endian register with barriers to
order against both spinlocks and DMA.

The big-endian iowrite32be() on the same pointer uses a nonatomic
store with no barriers whatsoever and the opposite endianess.

On most architectures, this is not important:
- For x86, the stores are aways atomic and no additional barriers
  are needed, so the two are the same
- For ARM (both 32 and 64-bit), powerpc and many others, we don't
  use the generic iowrite() and just fall back to writel() or
  writel(swab32()).

However, shouldn't we just use the writel(swab32()) logic here as well
for the common case rather than risking missing barriers?

   Arnd


Re: [PATCH v2 3/9] crypto: caam - don't leak pointers to authenc keys

2018-03-26 Thread Horia Geantă
On 3/23/2018 12:42 PM, Tudor Ambarus wrote:
> In caam's aead_setkey we save pointers to the authenc keys in a
> local variable of type struct crypto_authenc_keys and we don't
> zeroize it after use. Fix this and don't leak pointers to the
> authenc keys.
> 
> Signed-off-by: Tudor Ambarus 
Reviewed-by: Horia Geantă 

Thanks,
Horia


Re: [PATCH v2 4/9] crypto: caam/qi - don't leak pointers to authenc keys

2018-03-26 Thread Horia Geantă
On 3/23/2018 12:42 PM, Tudor Ambarus wrote:
> In caam/qi's aead_setkey we save pointers to the authenc keys in
> a local variable of type struct crypto_authenc_keys and we don't
> zeroize it after use. Fix this and don't leak pointers to the
> authenc keys.
> 
> Signed-off-by: Tudor Ambarus 
Reviewed-by: Horia Geantă 

Thanks,
Horia


Re: [PATCH v6 0/5] add compression algorithm zBeWalgo

2018-03-26 Thread Minchan Kim
Hi Benjamin,

Thanks for the nice present and good testing!

I hope to grab a chance to test this shiny new algorithm but is busy this week.
Hopefully, I will get that soon and feedback to you asap.

Thanks.

On Mon, Mar 26, 2018 at 10:31:40AM +0200, Benjamin Warnke wrote:
> This patch series adds a new compression algorithm to the kernel and to
> the crypto api.
> 
> Changes since v5:
> - Fixed compile-error due to variable definitions inside #ifdef 
> CONFIG_ZRAM_WRITEBACK
> 
> Changes since v4:
> - Fix mismatching function-prototypes
> - Fix mismatching License errors
> - Add static to global vars
> - Add ULL to long constants
> 
> Changes since v3:
> - Split patch into patchset
> - Add Zstd = Zstandard to the list of benchmarked algorithms
> - Added configurable compression levels to crypto-api
> - Added multiple compression levels to the benchmarks below
> - Added unsafe decompressor functions to crypto-api
> - Added flag to mark unstable algorithms to crypto-api
> - Test the code using afl-fuzz -> and fix the code
> - Added 2 new Benchmark datasets
> - checkpatch.pl fixes
> 
> Changes since v2:
> - added linux-kernel Mailinglist
> 
> Changes since v1:
> - improved documentation
> - improved code style
> - replaced numerous casts with get_unaligned*
> - added tests in crypto/testmgr.h/c
> - added zBeWalgo to the list of algorithms shown by 
>  /sys/block/zram0/comp_algorithm
> 
> 
> Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM
> compresses each page individually. As a result the compression algorithm is
> forced to use a very small sliding window. None of the available compression
> algorithms is designed to achieve high compression ratios with small inputs.
> 
> This patch-set adds a new compression algorithm 'zBeWalgo' to the crypto api.
> This algorithm focusses on increasing the capacity of the compressed
> block-device created by ZRAM. The choice of compression algorithms is always
> a tradeoff between speed and compression ratio.
> 
> If faster algorithms like 'lz4' are chosen the compression ratio is often
> lower than the ratio of zBeWalgo as shown in the following benchmarks. Due to
> the lower compression ratio, ZRAM needs to fall back to backing_devices
> mode often. If backing_devices are required, the effective speed of ZRAM is a
> weighted average of de/compression time and writing/reading from the
> backing_device. This should be considered when comparing the speeds in the
> benchmarks.
> 
> There are different kinds of backing_devices, each with its own drawbacks.
> 1. HDDs: This kind of backing device is very slow. If the compression ratio
> of an algorithm is much lower than the ratio of zBeWalgo, it might be faster
> to use zBewalgo instead.
> 2. SSDs: I tested a swap partition on my NVME-SSD. The speed is even higher
> than zram with lz4, but after about 5 Minutes the SSD is blocking all
> read/write requests due to overheating. This is definitly not an option.
> 
> 
> Benchmarks:
> 
> 
> To obtain reproducable benchmarks, the datasets were first loaded into a
> userspace-program. Than the data is written directly to a clean
> zram-partition without any filesystem. Between writing and reading 'sync'
> and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are
> wall clock times, and the benchmarks are using only one cpu-core at a time.
> The new algorithm is compared to all available compression algorithms from
> the crypto-api.
> 
> Before loading the datasets to user-space deduplication is applied, since
> none Algorithm has deduplication. Duplicated pages are removed to
> prevent an algorithm to obtain high/low ratios, just because a single page can
> be compressed very well - or not.
> 
> All Algorithms marked with '*' are using unsafe decompression.
> 
> All Read and Write Speed Measurements are given in MBit/s
> 
> zbewalgo' uses per dataset specialized different combinations. These can be
> specified at runtime via /sys/kernel/zbewalgo/combinations.
> 
> 
> - '/dev/zero' This dataset is used to measure the speed limitations
> for ZRAM. ZRAM filters zero-data internally and does not even call the
> specified compression algorithm.
> 
> Algorithm   writeread
> --zram--  2724.08 2828.87
> 
> 
> - 'ecoham' This dataset is one of the input files for the scientific
> application ECOHAM which runs an ocean simulation. This dataset contains a
> lot of zeros - even after deduplication. Where the data is not zero there are
> arrays of floating point values, adjacent float values are likely to be
> similar to each other, allowing for high compression ratios.
> 
> zbewalgo reaches very high compression ratios and is a lot faster than other
> algorithms with similar compression ratios.
> 
> Algorithmratiowrite read
> --hdd--   1.00   134.70   156.62
> lz4*_10   6.73  1303.12  1547.17
> lz4_106.73  1303.12  1574.51
> lzo   6.88  1205.98  1468.09
> lz4*_05   7.00  1291.81  1642.41
> lz4_05

[PATCH v6 4/5] crypto: configurable compression level

2018-03-26 Thread Benjamin Warnke
Most compression algorithms published by the crypto api are supporting
multiple different compression levels. The crypto api currently just
calls these algorithms with their default compression level.

This patch enables the caller to specify the compression level.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/api.c  | 76 +++
 crypto/deflate.c  | 16 +
 crypto/lz4.c  | 16 +
 crypto/lz4hc.c| 13 +---
 crypto/testmgr.c  |  2 +-
 drivers/block/zram/zcomp.c| 10 +++---
 drivers/block/zram/zcomp.h|  3 +-
 drivers/block/zram/zram_drv.c | 24 --
 drivers/block/zram/zram_drv.h |  1 +
 fs/ubifs/compress.c   |  2 +-
 include/linux/crypto.h|  9 +++--
 mm/zswap.c|  2 +-
 net/xfrm/xfrm_ipcomp.c|  3 +-
 13 files changed, 146 insertions(+), 31 deletions(-)

diff --git a/crypto/api.c b/crypto/api.c
index 70a894e52..dadd4dede 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg 
*alg, u32 type,
 }
 EXPORT_SYMBOL_GPL(__crypto_alloc_tfm);
 
+struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg,
+  u32 type, u32 mask, int level)
+{
+   struct crypto_tfm *tfm = NULL;
+   unsigned int tfm_size;
+   int err = -ENOMEM;
+
+   tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask);
+   tfm = kzalloc(tfm_size, GFP_KERNEL);
+   if (!tfm)
+   goto out_err;
+
+   tfm->__crt_alg = alg;
+   if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS)
+   tfm->crt_compress.cot_level = level;
+
+   err = crypto_init_ops(tfm, type, mask);
+   if (err)
+   goto out_free_tfm;
+
+   if (!tfm->exit && alg->cra_init) {
+   err = alg->cra_init(tfm);
+   if (err)
+   goto cra_init_failed;
+   }
+
+   goto out;
+
+cra_init_failed:
+   crypto_exit_ops(tfm);
+out_free_tfm:
+   if (err == -EAGAIN)
+   crypto_shoot_alg(alg);
+   kfree(tfm);
+out_err:
+   tfm = ERR_PTR(err);
+out:
+   return tfm;
+}
+EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress);
+
 /*
  * crypto_alloc_base - Locate algorithm and allocate transform
  * @alg_name: Name of algorithm
@@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, 
u32 type, u32 mask)
 }
 EXPORT_SYMBOL_GPL(crypto_alloc_base);
 
+struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type,
+ u32 mask, int level)
+{
+   struct crypto_tfm *tfm;
+   int err;
+
+   for (;;) {
+   struct crypto_alg *alg;
+
+   alg = crypto_alg_mod_lookup(alg_name, type, mask);
+   if (IS_ERR(alg)) {
+   err = PTR_ERR(alg);
+   goto err;
+   }
+
+   tfm = __crypto_alloc_tfm_compress(alg, type, mask, level);
+   if (!IS_ERR(tfm))
+   return tfm;
+
+   crypto_mod_put(alg);
+   err = PTR_ERR(tfm);
+
+err:
+   if (err != -EAGAIN)
+   break;
+   if (fatal_signal_pending(current)) {
+   err = -EINTR;
+   break;
+   }
+   }
+
+   return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(crypto_alloc_base_compress);
+
 void *crypto_create_tfm(struct crypto_alg *alg,
const struct crypto_type *frontend)
 {
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 4b681a37c..54a2ff21b 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -24,6 +24,7 @@
  * it is not needed for IPCOMP and keeps the code simpler.  It can be
  * implemented if someone wants it.
  */
+
 #include 
 #include 
 #include 
@@ -43,7 +44,7 @@ struct deflate_ctx {
struct z_stream_s decomp_stream;
 };
 
-static int deflate_comp_init(struct deflate_ctx *ctx, int format)
+static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level)
 {
int ret = 0;
struct z_stream_s *stream = >comp_stream;
@@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int 
format)
goto out;
}
if (format)
-   ret = zlib_deflateInit(stream, 3);
+   ret = zlib_deflateInit(stream, level);
else
-   ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED,
+   ret = zlib_deflateInit2(stream, level, Z_DEFLATED,
-DEFLATE_DEF_WINBITS,
DEFLATE_DEF_MEMLEVEL,
Z_DEFAULT_STRATEGY);
@@ -109,11 +110,11 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx)

[PATCH v6 2/5] crypto: add zBeWalgo to crypto-api

2018-03-26 Thread Benjamin Warnke
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by
zram.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/Kconfig|  12 
 crypto/Makefile   |   1 +
 crypto/testmgr.c  |  10 +++
 crypto/testmgr.h  | 134 ++
 crypto/zbewalgo.c | 164 ++
 drivers/block/zram/zcomp.c|   3 +
 drivers/block/zram/zram_drv.h |   4 +-
 7 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 crypto/zbewalgo.c

diff --git a/crypto/Kconfig b/crypto/Kconfig
index b75264b09..3ac0d4ca7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1668,6 +1668,18 @@ config CRYPTO_LZ4
help
  This is the LZ4 algorithm.
 
+config CRYPTO_ZBEWALGO
+   tristate "zBeWalgo compression algorithm"
+   select CRYPTO_ALGAPI
+   select CRYPTO_ACOMP2
+   select ZBEWALGO_COMPRESS
+   help
+ This is the zBeWalgo compression algorithm. This algorithm
+ accepts only input sizes of at most one page at once.
+ To achieve high compression ratios zbewalgo can call multiple
+ transformation and compression algorithms in a row to optimize
+ the compressed size.
+
 config CRYPTO_LZ4HC
tristate "LZ4HC compression algorithm"
select CRYPTO_ALGAPI
diff --git a/crypto/Makefile b/crypto/Makefile
index cdbc03b35..2a42fb289 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o 
crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o
 obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
 obj-$(CONFIG_CRYPTO_842) += 842.o
 obj-$(CONFIG_CRYPTO_RNG2) += rng.o
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d5e23a142..294075476 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = {
.dec = __VECS(tf_xts_dec_tv_template)
}
}
+   }, {
+   .alg = "zbewalgo",
+   .test = alg_test_comp,
+   .fips_allowed = 1,
+   .suite = {
+   .comp = {
+   .comp = __VECS(zbewalgo_comp_tv_template),
+   .decomp = __VECS(zbewalgo_decomp_tv_template)
+   }
+   }
}, {
.alg = "zlib-deflate",
.test = alg_test_comp,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 6044f6906..996d8321e 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -35133,6 +35133,140 @@ static const struct hash_testvec 
bfin_crc_tv_template[] = {
 
 };
 
+static const struct comp_testvec zbewalgo_comp_tv_template[] = {
+   {
+   .inlen  = 512,
+   .outlen = 402,
+   .input  =
+   
"\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d"
+   
"\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d"
+   
"\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d"
+   
"\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d"
+   
"\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d"
+   
"\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d"
+   
"\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d"
+   
"\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d"
+   
"\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d"
+   
"\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d"
+   
"\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d"
+   
"\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d"
+   
"\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d"
+   
"\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d"
+   
"\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d"
+   
"\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d"
+   
"\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d"
+   
"\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d"
+   
"\x76\x7f\x4b\xaf\x49\x9e\xab\x3d\xeb\x38\x53\xea\xc2\x9b\xab\x3d"
+   
"\x53\xd8\xaa\x21\x3c\x99\xab\x3d\x13\xb4\x52\x55\xb5\x96\xab\x3d"
+   

[PATCH v6 3/5] crypto: add unsafe decompression to api

2018-03-26 Thread Benjamin Warnke
Up to Version 3 of this patch the decompressor of zbewalgo did not verify
that there is no overflow in the output buffer. Now zbewalgo includes a
safe decompressor which does check for buffer overflows and heap-error.
ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version,
which is a bit faster, but does no error checking. These unsafe decompressors
can be applied when the datasource and the whole datapath is trusted.

This patch publishes these existing functions in the crypto-api

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/842.c |  3 ++-
 crypto/compress.c| 10 ++
 crypto/crypto_null.c |  3 ++-
 crypto/deflate.c |  3 ++-
 crypto/lz4.c | 23 ++-
 crypto/lz4hc.c   | 23 ++-
 crypto/lzo.c |  3 ++-
 crypto/testmgr.c | 27 ++-
 crypto/zbewalgo.c| 29 -
 drivers/block/zram/zram_drv.c| 34 +-
 drivers/block/zram/zram_drv.h|  1 +
 drivers/crypto/cavium/zip/zip_main.c |  6 --
 drivers/crypto/nx/nx-842-powernv.c   |  3 ++-
 drivers/crypto/nx/nx-842-pseries.c   |  3 ++-
 include/linux/crypto.h   | 16 
 15 files changed, 174 insertions(+), 13 deletions(-)

diff --git a/crypto/842.c b/crypto/842.c
index bc26dc942..7e74ea26b 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -112,7 +112,8 @@ static struct crypto_alg alg = {
.cra_exit   = crypto842_exit,
.cra_u  = { .compress = {
.coa_compress   = crypto842_compress,
-   .coa_decompress = crypto842_decompress } }
+   .coa_decompress = crypto842_decompress,
+   .coa_decompress_unsafe  = crypto842_decompress } }
 };
 
 static struct scomp_alg scomp = {
diff --git a/crypto/compress.c b/crypto/compress.c
index f2d522924..bec796249 100644
--- a/crypto/compress.c
+++ b/crypto/compress.c
@@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm,
   dlen);
 }
 
+static int crypto_decompress_unsafe(struct crypto_tfm *tfm,
+   const u8 *src, unsigned int slen,
+u8 *dst, unsigned int *dlen)
+{
+   return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src,
+ slen, dst,
+ dlen);
+}
+
 int crypto_init_compress_ops(struct crypto_tfm *tfm)
 {
struct compress_tfm *ops = >crt_compress;
 
ops->cot_compress = crypto_compress;
ops->cot_decompress = crypto_decompress;
+   ops->cot_decompress_unsafe = crypto_decompress_unsafe;
 
return 0;
 }
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 20ff2c746..6e15e8c0b 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { {
.cra_module =   THIS_MODULE,
.cra_u  =   { .compress = {
.coa_compress   =   null_compress,
-   .coa_decompress =   null_compress } }
+   .coa_decompress =   null_compress,
+   .coa_decompress_unsafe  =   null_compress } }
 } };
 
 MODULE_ALIAS_CRYPTO("compress_null");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index 94ec3b36a..4b681a37c 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -286,7 +286,8 @@ static struct crypto_alg alg = {
.cra_exit   = deflate_exit,
.cra_u  = { .compress = {
.coa_compress   = deflate_compress,
-   .coa_decompress = deflate_decompress } }
+   .coa_decompress = deflate_decompress,
+   .coa_decompress_unsafe  = deflate_decompress } }
 };
 
 static struct scomp_alg scomp[] = { {
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 2ce2660d3..60a1914b7 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned 
int slen,
return 0;
 }
 
+static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen,
+ u8 *dst, unsigned int *dlen,
+  void *ctx)
+{
+   int out_len = LZ4_decompress_fast(src, dst, *dlen);
+
+   if (out_len < 0)
+   return -EINVAL;
+
+   *dlen = out_len;
+   return 0;
+}
+
 static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src,
   unsigned int slen, u8 *dst, unsigned int *dlen,
   void *ctx)
@@ -117,6 +130,13 @@ static int lz4_decompress_crypto(struct 

[PATCH v6 0/5] add compression algorithm zBeWalgo

2018-03-26 Thread Benjamin Warnke
This patch series adds a new compression algorithm to the kernel and to
the crypto api.

Changes since v5:
- Fixed compile-error due to variable definitions inside #ifdef 
CONFIG_ZRAM_WRITEBACK

Changes since v4:
- Fix mismatching function-prototypes
- Fix mismatching License errors
- Add static to global vars
- Add ULL to long constants

Changes since v3:
- Split patch into patchset
- Add Zstd = Zstandard to the list of benchmarked algorithms
- Added configurable compression levels to crypto-api
- Added multiple compression levels to the benchmarks below
- Added unsafe decompressor functions to crypto-api
- Added flag to mark unstable algorithms to crypto-api
- Test the code using afl-fuzz -> and fix the code
- Added 2 new Benchmark datasets
- checkpatch.pl fixes

Changes since v2:
- added linux-kernel Mailinglist

Changes since v1:
- improved documentation
- improved code style
- replaced numerous casts with get_unaligned*
- added tests in crypto/testmgr.h/c
- added zBeWalgo to the list of algorithms shown by 
 /sys/block/zram0/comp_algorithm


Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM
compresses each page individually. As a result the compression algorithm is
forced to use a very small sliding window. None of the available compression
algorithms is designed to achieve high compression ratios with small inputs.

This patch-set adds a new compression algorithm 'zBeWalgo' to the crypto api.
This algorithm focusses on increasing the capacity of the compressed
block-device created by ZRAM. The choice of compression algorithms is always
a tradeoff between speed and compression ratio.

If faster algorithms like 'lz4' are chosen the compression ratio is often
lower than the ratio of zBeWalgo as shown in the following benchmarks. Due to
the lower compression ratio, ZRAM needs to fall back to backing_devices
mode often. If backing_devices are required, the effective speed of ZRAM is a
weighted average of de/compression time and writing/reading from the
backing_device. This should be considered when comparing the speeds in the
benchmarks.

There are different kinds of backing_devices, each with its own drawbacks.
1. HDDs: This kind of backing device is very slow. If the compression ratio
of an algorithm is much lower than the ratio of zBeWalgo, it might be faster
to use zBewalgo instead.
2. SSDs: I tested a swap partition on my NVME-SSD. The speed is even higher
than zram with lz4, but after about 5 Minutes the SSD is blocking all
read/write requests due to overheating. This is definitly not an option.


Benchmarks:


To obtain reproducable benchmarks, the datasets were first loaded into a
userspace-program. Than the data is written directly to a clean
zram-partition without any filesystem. Between writing and reading 'sync'
and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are
wall clock times, and the benchmarks are using only one cpu-core at a time.
The new algorithm is compared to all available compression algorithms from
the crypto-api.

Before loading the datasets to user-space deduplication is applied, since
none Algorithm has deduplication. Duplicated pages are removed to
prevent an algorithm to obtain high/low ratios, just because a single page can
be compressed very well - or not.

All Algorithms marked with '*' are using unsafe decompression.

All Read and Write Speed Measurements are given in MBit/s

zbewalgo' uses per dataset specialized different combinations. These can be
specified at runtime via /sys/kernel/zbewalgo/combinations.


- '/dev/zero' This dataset is used to measure the speed limitations
for ZRAM. ZRAM filters zero-data internally and does not even call the
specified compression algorithm.

Algorithm   writeread
--zram--  2724.08 2828.87


- 'ecoham' This dataset is one of the input files for the scientific
application ECOHAM which runs an ocean simulation. This dataset contains a
lot of zeros - even after deduplication. Where the data is not zero there are
arrays of floating point values, adjacent float values are likely to be
similar to each other, allowing for high compression ratios.

zbewalgo reaches very high compression ratios and is a lot faster than other
algorithms with similar compression ratios.

Algorithmratiowrite read
--hdd--   1.00   134.70   156.62
lz4*_10   6.73  1303.12  1547.17
lz4_106.73  1303.12  1574.51
lzo   6.88  1205.98  1468.09
lz4*_05   7.00  1291.81  1642.41
lz4_057.00  1291.81  1682.81
lz4_077.13  1250.29  1593.89
lz4*_07   7.13  1250.29  1677.08
lz4_067.16  1307.62  1666.66
lz4*_06   7.16  1307.62  1669.42
lz4_037.21  1250.87  1449.48
lz4*_03   7.21  1250.87  1621.97
lz4*_04   7.23  1281.62  1645.56
lz4_047.23  1281.62  1666.81
lz4_027.33  1267.54  1523.11
lz4*_02   7.33  1267.54  1576.54
lz4_097.36  1140.55  1510.01
lz4*_09   7.36  1140.55  1692.38
lz4*_01   7.36  1215.40  

[PATCH v6 1/5] add compression algorithm zBeWalgo

2018-03-26 Thread Benjamin Warnke
zBeWalgo is a completely new algorithm - Currently it is not published
somewhere else right now, googleing it would not show up any results. The
following section describes how the algorithm works.

zBeWalgo itself is a container compression algorithm, which can execute
multiple different compression and transformation algorithms after each other.
The execution of different compression algorithms after each other will be
called 'combination' in this description and in the code. Additionally to be
able to execute combinations of algorithms, zBeWalgo can try different
combinations on the same input. This allows high compression ratios on
completely different datasets, which would otherwise require its own
algorithm each. Executing all known combinations on each input page would be
very slow. Therefore the data is compressed at first with that combination,
which was already successful on the last input page. If the compressed data
size of the current page is similar to that of the last page, the compressed
data is returned immediately without even trying the other combinations. Even
if there is no guarantee that consecutive calls to the algorithm belong to
each other, the speed improvement is obvious.

ZRAM uses zsmalloc for the management of the compressed pages. The largest
size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than
that threshold, ZRAM ignores the compression and writes the uncompressed page
instead. As a consequence it is useless to continue compression, if the
algorithm detects, that the data can not be compressed using the current
combination. The threshold for aborting compression can be changed via sysfs at
any time, even if the algorithm is currently in use. If a combination fails to
compress the data, zBeWalgo tries the next combination. If no combination is
able to reduce the data in size, zBeWalgo returns a negative value.

Each combination consists of up to 7 compression and transformation steps.
Combinations can be added and removed at any time via sysfs. Already compressed
Data can always be decompressed, even if the combination used to produce it
does not exist anymore. Technically the user could add up to 256 combinations
concurrently, but that would be very time consuming if the data can not be
compressed.

To be able to build combinations and call different algorithms, all those
algorithms are implementing the same interface. This enables the user to
specify additional combinations while ZRAM is running.

Within the combinations many different algorithms can be used. Some of those
algorithms are published. This patch adds the following algorithms to be used
within the combinations:
- bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and
'D. J. Wheeler' in 1994. This implementation uses counting sort for
sorting the data. Their original paper is online available at:
http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
- mtf: The Move-To-Front algorithm as described by 'M. Burrows' and
'D. J. Wheeler' in the same paper as bwt.
- jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012.
https://arxiv.org/pdf/1209.1045.pdf
- jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive
Bytes can increase the compression ratio, if for example the first
4 Bits of each Byte are zero. If jbe2 is called after mtf, this
happens ofthen.
- rle: Run Length Encoding
- huffman: Huffman encoding
- bewalgo: I invented this algorithm for my bachelors thesis
'Page-Based compression in the Linux Kernel'. This algorithm is
mainly inspired by lz4, focusing on increasing the speed even more,
with the help of page aligned read an write access. To achieve the
page alignment, the input and output data is accessed only in
blocks of 8 Bytes, therefore the encoding of the compressed data is
changed.

https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf
- bewalgo2: At the beginning of my work to improve ZRAM this was the whole
algorithm. The input is read in blocks of 8 Bytes. These Blocks
are added to an avl-tree. The avl-tree is mapped directly to an
array. The encoding is a variation of Run Length Encoding using the
indices in the avl-tree as data. The reason for using the tree
with indices is, that the indices can be encoded in less then
8 Bytes each.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 include/linux/zbewalgo.h  |  50 
 lib/Kconfig   |   3 +
 lib/Makefile  |   1 +
 lib/zbewalgo/BWT.c| 120 
 lib/zbewalgo/BWT.h|  21 ++
 lib/zbewalgo/JBE.c| 204 +
 lib/zbewalgo/JBE.h|  13 +
 lib/zbewalgo/JBE2.c   | 221 ++
 lib/zbewalgo/JBE2.h   |  13 +
 lib/zbewalgo/MTF.c| 122 
 lib/zbewalgo/MTF.h|  13 +
 lib/zbewalgo/Makefile |   4 +
 

[PATCH v6 5/5] crypto: add flag for unstable encoding

2018-03-26 Thread Benjamin Warnke
The data-format of zBeWalgo, and some other algorithms is unstable. To
identify such unstable algorithms this patch adds a new flag to the
crypto-api.

Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de>
---
 crypto/zbewalgo.c  | 2 +-
 include/linux/crypto.h | 6 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c
index 9db0d43be..e57b5ced5 100644
--- a/crypto/zbewalgo.c
+++ b/crypto/zbewalgo.c
@@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct 
crypto_tfm *tfm,
 
 static struct crypto_alg crypto_alg_zbewalgo = {
.cra_name = "zbewalgo",
-   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+   .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING,
.cra_ctxsize = sizeof(struct zbewalgo_ctx),
.cra_module = THIS_MODULE,
.cra_init = zbewalgo_init,
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 63420dac0..372893569 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -112,6 +112,12 @@
  */
 #define CRYPTO_ALG_OPTIONAL_KEY0x4000
 
+/*
+ * Set if the algorithm is new and it is likely that the encoding may
+ * change in near future
+ */
+#define CRYPTO_ALG_UNSTABLE_ENCODING   0x8000
+
 /*
  * Transform masks and values (for crt_flags).
  */
-- 
2.14.1



[PATCH 0/2] crypto: ccree: cleanup and hardware keys

2018-03-26 Thread Gilad Ben-Yossef
Small cleanup and add support for CryptoCell hardware keys.

Gilad Ben-Yossef (2):
  crypto: ccree: remove unused enums
  crypto: ccree: enable support for hardware keys

 crypto/testmgr.c|  43 
 drivers/crypto/ccree/cc_cipher.c| 348 
 drivers/crypto/ccree/cc_cipher.h|  30 +--
 drivers/crypto/ccree/cc_hw_queue_defs.h |  28 +--
 4 files changed, 366 insertions(+), 83 deletions(-)

-- 
2.7.4



[PATCH 1/2] crypto: ccree: remove unused enums

2018-03-26 Thread Gilad Ben-Yossef
Remove enums definitions unused in the driver code.

Signed-off-by: Gilad Ben-Yossef 
---
 drivers/crypto/ccree/cc_hw_queue_defs.h | 28 +++-
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h 
b/drivers/crypto/ccree/cc_hw_queue_defs.h
index bf0d235..a091ae5 100644
--- a/drivers/crypto/ccree/cc_hw_queue_defs.h
+++ b/drivers/crypto/ccree/cc_hw_queue_defs.h
@@ -124,13 +124,6 @@ enum cc_flow_mode {
FLOW_MODE_END = S32_MAX,
 };
 
-enum cc_tunnel_op {
-   TUNNEL_OP_INVALID = -1,
-   TUNNEL_OFF = 0,
-   TUNNEL_ON = 1,
-   TUNNEL_OP_END = S32_MAX,
-};
-
 enum cc_setup_op {
SETUP_LOAD_NOP  = 0,
SETUP_LOAD_STATE0   = 1,
@@ -145,6 +138,13 @@ enum cc_setup_op {
SETUP_OP_END = S32_MAX,
 };
 
+enum cc_hash_conf_pad {
+   HASH_PADDING_DISABLED = 0,
+   HASH_PADDING_ENABLED = 1,
+   HASH_DIGEST_RESULT_LITTLE_ENDIAN = 2,
+   HASH_CONFIG1_PADDING_RESERVE32 = S32_MAX,
+};
+
 enum cc_aes_mac_selector {
AES_SK = 1,
AES_CMAC_INIT = 2,
@@ -179,20 +179,6 @@ enum cc_hw_aes_key_size {
END_OF_AES_KEYS = S32_MAX,
 };
 
-enum cc_hw_des_key_size {
-   DES_ONE_KEY = 0,
-   DES_TWO_KEYS = 1,
-   DES_THREE_KEYS = 2,
-   END_OF_DES_KEYS = S32_MAX,
-};
-
-enum cc_hash_conf_pad {
-   HASH_PADDING_DISABLED = 0,
-   HASH_PADDING_ENABLED = 1,
-   HASH_DIGEST_RESULT_LITTLE_ENDIAN = 2,
-   HASH_CONFIG1_PADDING_RESERVE32 = S32_MAX,
-};
-
 enum cc_hash_cipher_pad {
DO_NOT_PAD = 0,
DO_PAD = 1,
-- 
2.7.4



[PATCH 2/2] crypto: ccree: enable support for hardware keys

2018-03-26 Thread Gilad Ben-Yossef
Enable CryptoCell support for hardware keys.

Hardware keys are regular AES keys loaded into CryptoCell internal memory
via firmware, often from secure boot ROM or hardware fuses at boot time.

As such, they can be used for enc/dec purposes like any other key but
cannot (read: extremely hard to) be extracted since since they are not
available anywhere in RAM during runtime.

The mechanism has some similarities to s390 secure keys although the keys
are not wrapped or sealed, but simply loaded offline. The interface was
therefore modeled based on the s390 secure keys support.

Signed-off-by: Gilad Ben-Yossef 
---
 crypto/testmgr.c |  43 +
 drivers/crypto/ccree/cc_cipher.c | 348 ++-
 drivers/crypto/ccree/cc_cipher.h |  30 +---
 3 files changed, 359 insertions(+), 62 deletions(-)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index af4a01c..8a5a60c 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -2558,6 +2558,13 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+   /* Same as cbc(aes) except the key is stored in
+* hardware secure memory which we reference by index
+*/
+   .alg = "cbc(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
.alg = "cbc(serpent)",
.test = alg_test_skcipher,
.suite = {
@@ -2704,6 +2711,13 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+   /* Same as ctr(aes) except the key is stored in
+* hardware secure memory which we reference by index
+*/
+   .alg = "ctr(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
.alg = "ctr(serpent)",
.test = alg_test_skcipher,
.suite = {
@@ -2974,6 +2988,13 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+   /* Same as ecb(aes) except the key is stored in
+* hardware secure memory which we reference by index
+*/
+   .alg = "ecb(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
.alg = "ecb(khazad)",
.test = alg_test_skcipher,
.suite = {
@@ -3301,6 +3322,13 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+   /* Same as ofb(aes) except the key is stored in
+* hardware secure memory which we reference by index
+*/
+   .alg = "ofb(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
.alg = "pcbc(fcrypt)",
.test = alg_test_skcipher,
.suite = {
@@ -3558,6 +3586,21 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}
}, {
+   /* Same as xts(aes) except the key is stored in
+* hardware secure memory which we reference by index
+*/
+   .alg = "xts(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
+   .alg = "xts4096(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
+   .alg = "xts512(haes)",
+   .test = alg_test_null,
+   .fips_allowed = 1,
+   }, {
.alg = "xts(camellia)",
.test = alg_test_skcipher,
.suite = {
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index df98f7a..8ccb7c4 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -42,6 +42,7 @@ struct cc_cipher_ctx {
int cipher_mode;
int flow_mode;
unsigned int flags;
+   bool hw_key;
struct cc_user_key_info user;
struct cc_hw_key_info hw;
struct crypto_shash *shash_tfm;
@@ -49,6 +50,13 @@ struct cc_cipher_ctx {
 
 static void cc_cipher_complete(struct device *dev, void *cc_req, int err);
 
+static inline bool cc_is_hw_key(struct crypto_tfm *tfm)
+{
+   struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm);
+
+   return ctx_p->hw_key;
+}
+
 static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size)
 {
switch (ctx_p->flow_mode) {
@@ -211,7 +219,7 @@ struct tdes_keys {
u8  key3[DES_KEY_SIZE];
 };
 
-static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num)
+static enum cc_hw_crypto_key cc_slot_to_hw_key(int slot_num)
 {
switch (slot_num) {
case 0:
@@ -226,69 +234,98 @@ static enum cc_hw_crypto_key