[PATCH v3] crypto: ecc: Remove stack VLA usage
On the quest to remove all VLAs from the kernel[1], this avoids VLAs by just using the maximum allocation size (4 bytes) for stack arrays. All the VLAs in ecc were either 3 or 4 bytes (or a multiple), so just make it 4 bytes all the time. Initialization routines are adjusted to check that ndigits does not end up larger than the arrays. [1] https://lkml.org/lkml/2018/3/7/621 Signed-off-by: Kees Cook--- This expects 14de52112ee70ca289fa77bf2d9cbc79fd2c811f to be reverted. --- crypto/ecc.c | 47 --- crypto/ecc.h | 4 +++- crypto/ecdh.c | 4 ++-- 3 files changed, 29 insertions(+), 26 deletions(-) diff --git a/crypto/ecc.c b/crypto/ecc.c index 18f32f2a5e1c..815541309a95 100644 --- a/crypto/ecc.c +++ b/crypto/ecc.c @@ -515,7 +515,7 @@ static void vli_mmod_fast_256(u64 *result, const u64 *product, static bool vli_mmod_fast(u64 *result, u64 *product, const u64 *curve_prime, unsigned int ndigits) { - u64 tmp[2 * ndigits]; + u64 tmp[2 * ECC_MAX_DIGITS]; switch (ndigits) { case 3: @@ -536,7 +536,7 @@ static bool vli_mmod_fast(u64 *result, u64 *product, static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, const u64 *curve_prime, unsigned int ndigits) { - u64 product[2 * ndigits]; + u64 product[2 * ECC_MAX_DIGITS]; vli_mult(product, left, right, ndigits); vli_mmod_fast(result, product, curve_prime, ndigits); @@ -546,7 +546,7 @@ static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right, static void vli_mod_square_fast(u64 *result, const u64 *left, const u64 *curve_prime, unsigned int ndigits) { - u64 product[2 * ndigits]; + u64 product[2 * ECC_MAX_DIGITS]; vli_square(product, left, ndigits); vli_mmod_fast(result, product, curve_prime, ndigits); @@ -560,8 +560,8 @@ static void vli_mod_square_fast(u64 *result, const u64 *left, static void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod, unsigned int ndigits) { - u64 a[ndigits], b[ndigits]; - u64 u[ndigits], v[ndigits]; + u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS]; + u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS]; u64 carry; int cmp_result; @@ -649,8 +649,8 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, u64 *curve_prime, unsigned int ndigits) { /* t1 = x, t2 = y, t3 = z */ - u64 t4[ndigits]; - u64 t5[ndigits]; + u64 t4[ECC_MAX_DIGITS]; + u64 t5[ECC_MAX_DIGITS]; if (vli_is_zero(z1, ndigits)) return; @@ -711,7 +711,7 @@ static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1, static void apply_z(u64 *x1, u64 *y1, u64 *z, u64 *curve_prime, unsigned int ndigits) { - u64 t1[ndigits]; + u64 t1[ECC_MAX_DIGITS]; vli_mod_square_fast(t1, z, curve_prime, ndigits);/* z^2 */ vli_mod_mult_fast(x1, x1, t1, curve_prime, ndigits); /* x1 * z^2 */ @@ -724,7 +724,7 @@ static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *p_initial_z, u64 *curve_prime, unsigned int ndigits) { - u64 z[ndigits]; + u64 z[ECC_MAX_DIGITS]; vli_set(x2, x1, ndigits); vli_set(y2, y1, ndigits); @@ -750,7 +750,7 @@ static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, unsigned int ndigits) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ - u64 t5[ndigits]; + u64 t5[ECC_MAX_DIGITS]; /* t5 = x2 - x1 */ vli_mod_sub(t5, x2, x1, curve_prime, ndigits); @@ -791,9 +791,9 @@ static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2, u64 *curve_prime, unsigned int ndigits) { /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ - u64 t5[ndigits]; - u64 t6[ndigits]; - u64 t7[ndigits]; + u64 t5[ECC_MAX_DIGITS]; + u64 t6[ECC_MAX_DIGITS]; + u64 t7[ECC_MAX_DIGITS]; /* t5 = x2 - x1 */ vli_mod_sub(t5, x2, x1, curve_prime, ndigits); @@ -846,9 +846,9 @@ static void ecc_point_mult(struct ecc_point *result, unsigned int ndigits) { /* R0 and R1 */ - u64 rx[2][ndigits]; - u64 ry[2][ndigits]; - u64 z[ndigits]; + u64 rx[2][ECC_MAX_DIGITS]; + u64 ry[2][ECC_MAX_DIGITS]; + u64 z[ECC_MAX_DIGITS]; int i, nb; int num_bits = vli_num_bits(scalar, ndigits); @@ -943,13 +943,13 @@ int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits, int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits, u64 *privkey) { const struct ecc_curve *curve = ecc_get_curve(curve_id); - u64 priv[ndigits]; + u64 priv[ECC_MAX_DIGITS];
Re: [PATCH v2] crypto/ecc: Remove stack VLA usage
On Fri, Mar 16, 2018 at 8:56 AM, Herbert Xuwrote: > On Thu, Mar 08, 2018 at 01:57:02PM -0800, Kees Cook wrote: >> On the quest to remove all VLAs from the kernel[1], this switches to >> a pair of kmalloc regions instead of using the stack. This also moves >> the get_random_bytes() after all allocations (and drops the needless >> "nbytes" variable). >> >> [1] https://lkml.org/lkml/2018/3/7/621 >> >> Signed-off-by: Kees Cook > > Patch applied. Thanks. Hi, sorry for the noise on this one: I messed up looking at the ecc code (I confused myself into thinking there was only a single instance of the problem). The applied patch is both incomplete and inefficient. I have a much simpler solution, and I'll send that with a revert... -Kees -- Kees Cook Pixel Security
Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe
On 26/03/18 01:50 PM, Arnd Bergmann wrote: > I wouldn't expect it to matter: the byte swap is almost always much > cheaper compared to the actual bus access for the MMIO, and I > would also guess that modern compilers can eliminate the double > byte swap on architectures where writel() is an inline function. Most of > the important architectures use ARCH_USE_BUILTIN_BSWAP, which > guarantees that. Fair enough. Sometime this week I'll update my patch set to change that. Thanks, Logan
Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe
On Mon, Mar 26, 2018 at 6:21 PM, Logan Gunthorpewrote: > > > On 26/03/18 04:53 AM, Arnd Bergmann wrote: >> On most architectures, this is not important: >> - For x86, the stores are aways atomic and no additional barriers >> are needed, so the two are the same >> - For ARM (both 32 and 64-bit), powerpc and many others, we don't >> use the generic iowrite() and just fall back to writel() or >> writel(swab32()). >> >> However, shouldn't we just use the writel(swab32()) logic here as well >> for the common case rather than risking missing barriers? > > Hmm, I don't know... it's complicated? > > Doing a bit of digging shows that the existing code was written during a > time when writel() did not include extra barriers over __raw_writel() in > any of the common arches. > > The commit logs don't seem to provide any guidance as to why this it was > done this way, but I'd assume it was done to avoid a double swab() call > on BE arches. Seeing writel() is typically implemented as: > > __raw_writel(__cpu_to_le32(value), addr); > > Then on BE arches, writel(swab32()) would become: > > __raw_writel(swab32(swab32(value)), addr) > > Which seems undesirable. I wouldn't expect it to matter: the byte swap is almost always much cheaper compared to the actual bus access for the MMIO, and I would also guess that modern compilers can eliminate the double byte swap on architectures where writel() is an inline function. Most of the important architectures use ARCH_USE_BUILTIN_BSWAP, which guarantees that. Arnd
Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe
On 26/03/18 04:53 AM, Arnd Bergmann wrote: > On most architectures, this is not important: > - For x86, the stores are aways atomic and no additional barriers > are needed, so the two are the same > - For ARM (both 32 and 64-bit), powerpc and many others, we don't > use the generic iowrite() and just fall back to writel() or > writel(swab32()). > > However, shouldn't we just use the writel(swab32()) logic here as well > for the common case rather than risking missing barriers? Hmm, I don't know... it's complicated? Doing a bit of digging shows that the existing code was written during a time when writel() did not include extra barriers over __raw_writel() in any of the common arches. The commit logs don't seem to provide any guidance as to why this it was done this way, but I'd assume it was done to avoid a double swab() call on BE arches. Seeing writel() is typically implemented as: __raw_writel(__cpu_to_le32(value), addr); Then on BE arches, writel(swab32()) would become: __raw_writel(swab32(swab32(value)), addr) Which seems undesirable. Logan
Re: in-kernel user of ecdsa
On 03/12/2018 07:07 PM, Tudor Ambarus wrote: Would you consider using ECDSA in the kernel module signing facility? Any feedback is good. I can invest some time to make this happen, if needed. When compared with RSA, ECDSA has shorter keys, the key generation process is faster, the sign operation is faster, but the verify operation is slower than with RSA. Smaller key sizes imply reduced memory footprint and bandwidth that are especially attractive for memory constrained devices. I'm working with such a device, capable of generating ecc keys, secure key storage and ecdsa/ecdh crypto acceleration. I'm trying to find an in-kernel user of ecdsa. ECDSA and RSA comparison -> ECDSA requires a much smaller key length in order to provide the same security strength as RSA [1]: Security StrengthRSA (bits)ECDSA (bits) 112 2048 224 - 255 128 3072 256 - 383 192 7680 384 - 511 256 15360 512+ 7680 and 15360 keys are not included in the NIST standards for interoperability and efficiency reasons, the keys are just too big. -> key generation: ECC key generation is faster than IFC (Integer - Factorization Cryptography). RSA private key is based on large prime numbers, while for ECDSA any positive integer less than n is a valid private key. -> ECDSA sign operations are faster than RSA, but verify operations are slower. Here's an openssl speed test that I've run on my computer: signverifysign/s verify/s rsa 2048 bits 0.000604s 0.18s 1656.3 56813.7 rsa 4096 bits 0.004027s 0.62s248.3 16052.5 signverifysign/s verify/s 256 bit ecdsa (nistp256) 0.s 0.0001s 28986.4 13516.3 384 bit ecdsa (nistp384) 0.0002s 0.0008s 5541.0 1322.2 521 bit ecdsa (nistp521) 0.0003s 0.0006s 3104.2 1756.2 Best, ta [1] NIST SP 800-57 Pt. 1 Rev. 4, Recommendation for key management -- To unsubscribe from this list: send the line "unsubscribe keyrings" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v6 04/12] ima: Introduce is_ima_sig()
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote: > With the introduction of another IMA signature type (modsig), some places > will need to check for both of them. It is cleaner to do that if there's a > helper function to tell whether an xattr_value represents an IMA > signature. Initially the function name "is_ima_sig" is fine, since it reflects the 'imasig' type. Having a more generic function name would be better when adding 'modsig' support. As long as the function is locally define, we can drop 'ima' from the name. Perhaps something like has_signature or is_signed() would be preferable. Mimi > > Suggested-by: Mimi Zohar> Signed-off-by: Thiago Jung Bauermann > --- > security/integrity/ima/ima.h | 5 + > security/integrity/ima/ima_appraise.c | 7 +++ > security/integrity/ima/ima_template_lib.c | 2 +- > 3 files changed, 9 insertions(+), 5 deletions(-) > > diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h > index 35fe91aa1fc9..4bafa6a97967 100644 > --- a/security/integrity/ima/ima.h > +++ b/security/integrity/ima/ima.h > @@ -155,6 +155,11 @@ unsigned long ima_get_binary_runtime_size(void); > int ima_init_template(void); > void ima_init_template_list(void); > > +static inline bool is_ima_sig(const struct evm_ima_xattr_data *xattr_value) > +{ > + return xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG; > +} > + > /* > * used to protect h_table and sha_table > */ > diff --git a/security/integrity/ima/ima_appraise.c > b/security/integrity/ima/ima_appraise.c > index a6b2995b7d0b..01172eab297b 100644 > --- a/security/integrity/ima/ima_appraise.c > +++ b/security/integrity/ima/ima_appraise.c > @@ -325,15 +325,14 @@ int ima_appraise_measurement(enum ima_hooks func, > } else if (status != INTEGRITY_PASS) { > /* Fix mode, but don't replace file signatures. */ > if ((ima_appraise & IMA_APPRAISE_FIX) && > - (!xattr_value || > - xattr_value->type != EVM_IMA_XATTR_DIGSIG)) { > + !is_ima_sig(xattr_value)) { > if (!ima_fix_xattr(dentry, iint)) > status = INTEGRITY_PASS; > } > > /* Permit new files with file signatures, but without data. */ > if (inode->i_size == 0 && iint->flags & IMA_NEW_FILE && > - xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG) { > + is_ima_sig(xattr_value)) { > status = INTEGRITY_PASS; > } > > @@ -448,7 +447,7 @@ int ima_inode_setxattr(struct dentry *dentry, const char > *xattr_name, > if (!xattr_value_len || (xvalue->type >= IMA_XATTR_LAST)) > return -EINVAL; > ima_reset_appraise_flags(d_backing_inode(dentry), > - xvalue->type == EVM_IMA_XATTR_DIGSIG); > + is_ima_sig(xvalue)); > result = 0; > } > return result; > diff --git a/security/integrity/ima/ima_template_lib.c > b/security/integrity/ima/ima_template_lib.c > index 5afaa53decc5..afb52a90e532 100644 > --- a/security/integrity/ima/ima_template_lib.c > +++ b/security/integrity/ima/ima_template_lib.c > @@ -380,7 +380,7 @@ int ima_eventsig_init(struct ima_event_data *event_data, > { > struct evm_ima_xattr_data *xattr_value = event_data->xattr_value; > > - if ((!xattr_value) || (xattr_value->type != EVM_IMA_XATTR_DIGSIG)) > + if (!is_ima_sig(xattr_value)) > return 0; > > return ima_write_template_field_data(xattr_value, event_data->xattr_len, >
Re: [PATCH v6 11/12] ima: Implement support for module-style appended signatures
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote: > This patch actually implements the appraise_type=imasig|modsig option, > allowing IMA to read and verify modsig signatures. > > In case both are present in the same file, IMA will first check whether the > key used by the xattr signature is present in the kernel keyring. If not, > it will try the appended signature. Yes, this sounds right. > > Signed-off-by: Thiago Jung Bauermann> --- > security/integrity/ima/ima.h | 11 +++- > security/integrity/ima/ima_appraise.c | 53 > +++ > security/integrity/ima/ima_main.c | 21 +++--- > 3 files changed, 74 insertions(+), 11 deletions(-) > > diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h > index 49aef56dc96d..c11ccb7c5bfb 100644 > --- a/security/integrity/ima/ima.h > +++ b/security/integrity/ima/ima.h > @@ -157,7 +157,8 @@ void ima_init_template_list(void); > > static inline bool is_ima_sig(const struct evm_ima_xattr_data *xattr_value) > { > - return xattr_value && xattr_value->type == EVM_IMA_XATTR_DIGSIG; > + return xattr_value && (xattr_value->type == EVM_IMA_XATTR_DIGSIG || > +xattr_value->type == IMA_MODSIG); > } > > /* > @@ -253,6 +254,8 @@ enum integrity_status ima_get_cache_status(struct > integrity_iint_cache *iint, > enum ima_hooks func); > enum hash_algo ima_get_hash_algo(struct evm_ima_xattr_data *xattr_value, >int xattr_len); > +bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data *xattr_value, > + int xattr_len); > int ima_read_xattr(struct dentry *dentry, > struct evm_ima_xattr_data **xattr_value); > > @@ -291,6 +294,12 @@ ima_get_hash_algo(struct evm_ima_xattr_data > *xattr_value, int xattr_len) > return ima_hash_algo; > } > > +static inline bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data > +*xattr_value, int xattr_len) > +{ > + return false; > +} > + > static inline int ima_read_xattr(struct dentry *dentry, >struct evm_ima_xattr_data **xattr_value) > { > diff --git a/security/integrity/ima/ima_appraise.c > b/security/integrity/ima/ima_appraise.c > index 01172eab297b..84e0fd5a19c8 100644 > --- a/security/integrity/ima/ima_appraise.c > +++ b/security/integrity/ima/ima_appraise.c > @@ -189,6 +189,22 @@ enum hash_algo ima_get_hash_algo(struct > evm_ima_xattr_data *xattr_value, > return ima_hash_algo; > } > > +bool ima_xattr_sig_known_key(const struct evm_ima_xattr_data *xattr_value, > + int xattr_len) > +{ > + struct key *keyring; > + > + if (xattr_value->type != EVM_IMA_XATTR_DIGSIG) > + return false; > + > + keyring = integrity_keyring_from_id(INTEGRITY_KEYRING_IMA); > + if (IS_ERR(keyring)) > + return false; > + > + return asymmetric_sig_has_known_key(keyring, (const char *) xattr_value, > + xattr_len); > +} > + > int ima_read_xattr(struct dentry *dentry, > struct evm_ima_xattr_data **xattr_value) > { > @@ -221,8 +237,12 @@ int ima_appraise_measurement(enum ima_hooks func, > struct inode *inode = d_backing_inode(dentry); > enum integrity_status status = INTEGRITY_UNKNOWN; > int rc = xattr_len, hash_start = 0; > + size_t xattr_contents_len; > + void *xattr_contents; > > - if (!(inode->i_opflags & IOP_XATTR)) > + /* If not appraising a modsig, we need an xattr. */ > + if ((xattr_value == NULL || xattr_value->type != IMA_MODSIG) && > + !(inode->i_opflags & IOP_XATTR)) > return INTEGRITY_UNKNOWN; > > if (rc <= 0) { > @@ -241,13 +261,29 @@ int ima_appraise_measurement(enum ima_hooks func, > goto out; > } > > - status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint); > + /* > + * If it's a modsig, we don't have the xattr contents to pass to > + * evm_verifyxattr(). > + */ > + if (xattr_value->type == IMA_MODSIG) { > + xattr_contents = NULL; > + xattr_contents_len = 0; > + } else { > + xattr_contents = xattr_value; > + xattr_contents_len = xattr_len; > + } > + > + status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_contents, > + xattr_contents_len, iint); > switch (status) { > case INTEGRITY_PASS: > case INTEGRITY_PASS_IMMUTABLE: > case INTEGRITY_UNKNOWN: > break; > case INTEGRITY_NOXATTRS:/* No EVM protected xattrs. */ > + /* It's fine not to have xattrs when using a modsig. */ > + if (xattr_value->type == IMA_MODSIG) > + break; > case INTEGRITY_NOLABEL:
Re: [PATCH v6 12/12] ima: Write modsig to the measurement list
On Fri, 2018-03-16 at 17:38 -0300, Thiago Jung Bauermann wrote: > Define new "d-sig" template field which holds the digest that is expected > to match the one contained in the modsig. > > Also add modsig support to the "sig" template field, allowing the the > contents of the modsig to be included in the measurement list. Although including the appended signature in the template data doesn't make sense on its own, as the file digest (without the appended signature) is needed to validate the appended signature, defining a new template field and its usage should be independent of other changes. Mimi > > Suggested-by: Mimi Zohar> Signed-off-by: Thiago Jung Bauermann > --- > Documentation/security/IMA-templates.rst | 5 > security/integrity/ima/ima_template.c | 4 ++- > security/integrity/ima/ima_template_lib.c | 47 > +-- > security/integrity/ima/ima_template_lib.h | 2 ++ > 4 files changed, 55 insertions(+), 3 deletions(-) > > diff --git a/Documentation/security/IMA-templates.rst > b/Documentation/security/IMA-templates.rst > index 2cd0e273cc9a..f2a0f4225857 100644 > --- a/Documentation/security/IMA-templates.rst > +++ b/Documentation/security/IMA-templates.rst > @@ -68,6 +68,11 @@ descriptors by adding their identifier to the format string > - 'd-ng': the digest of the event, calculated with an arbitrary hash > algorithm (field format: [:]digest, where the digest > prefix is shown only if the hash algorithm is not SHA1 or MD5); > + - 'd-sig': the digest of the event for files that have an appended modsig. > This > + field is calculated without including the modsig and thus will differ from > + the total digest of the file, but it is what should match the digest > + contained in the modsig (if it doesn't, the signature is invalid). It is > + shown in the same format as 'd-ng'; > - 'n-ng': the name of the event, without size limitations; > - 'sig': the file signature. > > diff --git a/security/integrity/ima/ima_template.c > b/security/integrity/ima/ima_template.c > index 30db39b23804..36fc32f538b5 100644 > --- a/security/integrity/ima/ima_template.c > +++ b/security/integrity/ima/ima_template.c > @@ -43,8 +43,10 @@ static struct ima_template_field supported_fields[] = { >.field_show = ima_show_template_string}, > {.field_id = "sig", .field_init = ima_eventsig_init, >.field_show = ima_show_template_sig}, > + {.field_id = "d-sig", .field_init = ima_eventdigest_sig_init, > + .field_show = ima_show_template_digest_ng}, > }; > -#define MAX_TEMPLATE_NAME_LEN 15 > +#define MAX_TEMPLATE_NAME_LEN 24 > > static struct ima_template_desc *ima_template; > static struct ima_template_desc *lookup_template_desc(const char *name); > diff --git a/security/integrity/ima/ima_template_lib.c > b/security/integrity/ima/ima_template_lib.c > index afb52a90e532..1dca082cce43 100644 > --- a/security/integrity/ima/ima_template_lib.c > +++ b/security/integrity/ima/ima_template_lib.c > @@ -220,7 +220,8 @@ int ima_parse_buf(void *bufstartp, void *bufendp, void > **bufcurp, > return 0; > } > > -static int ima_eventdigest_init_common(u8 *digest, u32 digestsize, u8 > hash_algo, > +static int ima_eventdigest_init_common(const u8 *digest, u32 digestsize, > +u8 hash_algo, > struct ima_field_data *field_data) > { > /* > @@ -323,6 +324,35 @@ int ima_eventdigest_ng_init(struct ima_event_data > *event_data, > hash_algo, field_data); > } > > +/* > + * This function writes the digest of the file which is expected to match the > + * digest contained in the file's embedded signature. > + */ > +int ima_eventdigest_sig_init(struct ima_event_data *event_data, > + struct ima_field_data *field_data) > +{ > + struct evm_ima_xattr_data *xattr_value = event_data->xattr_value; > + enum hash_algo hash_algo = HASH_ALGO_SHA1; > + const u8 *cur_digest = NULL; > + u8 cur_digestsize = 0; > + int ret; > + > + if (!xattr_value || xattr_value->type != IMA_MODSIG) > + return 0; > + > + if (event_data->violation) /* recording a violation. */ > + goto out; > + > + ret = ima_get_modsig_hash(xattr_value, _algo, _digest, > + _digestsize); > + if (ret) > + return ret; > + > + out: > + return ima_eventdigest_init_common(cur_digest, cur_digestsize, > +hash_algo, field_data); > +} > + > static int ima_eventname_init_common(struct ima_event_data *event_data, >struct ima_field_data *field_data, >bool size_limit) > @@ -379,10 +409,23 @@ int ima_eventsig_init(struct ima_event_data *event_data, > struct ima_field_data
[PATCH] crypto: rsa - remove unneeded initializations
Remove useless assignment of ret to -ENOMEM in rsa_verify. Remove useless initialization of ret to zero at declaration in rsa_enc/dec/sign/verify. Benefit of the power of undefined values and set ret in branches in rsa_enc/dec/sign. Reported-by: Benjamin BalesSigned-off-by: Tudor Ambarus --- crypto/rsa.c | 24 +--- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/crypto/rsa.c b/crypto/rsa.c index b067f3a..e75ce09 100644 --- a/crypto/rsa.c +++ b/crypto/rsa.c @@ -88,7 +88,7 @@ static int rsa_enc(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI m, c = mpi_alloc(0); - int ret = 0; + int ret; int sign; if (!c) @@ -99,10 +99,11 @@ static int rsa_enc(struct akcipher_request *req) goto err_free_c; } - ret = -ENOMEM; m = mpi_read_raw_from_sgl(req->src, req->src_len); - if (!m) + if (!m) { + ret = -ENOMEM; goto err_free_c; + } ret = _rsa_enc(pkey, c, m); if (ret) @@ -127,7 +128,7 @@ static int rsa_dec(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI c, m = mpi_alloc(0); - int ret = 0; + int ret; int sign; if (!m) @@ -138,10 +139,11 @@ static int rsa_dec(struct akcipher_request *req) goto err_free_m; } - ret = -ENOMEM; c = mpi_read_raw_from_sgl(req->src, req->src_len); - if (!c) + if (!c) { + ret = -ENOMEM; goto err_free_m; + } ret = _rsa_dec(pkey, m, c); if (ret) @@ -165,7 +167,7 @@ static int rsa_sign(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI m, s = mpi_alloc(0); - int ret = 0; + int ret; int sign; if (!s) @@ -176,10 +178,11 @@ static int rsa_sign(struct akcipher_request *req) goto err_free_s; } - ret = -ENOMEM; m = mpi_read_raw_from_sgl(req->src, req->src_len); - if (!m) + if (!m) { + ret = -ENOMEM; goto err_free_s; + } ret = _rsa_sign(pkey, s, m); if (ret) @@ -204,7 +207,7 @@ static int rsa_verify(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); const struct rsa_mpi_key *pkey = rsa_get_key(tfm); MPI s, m = mpi_alloc(0); - int ret = 0; + int ret; int sign; if (!m) @@ -215,7 +218,6 @@ static int rsa_verify(struct akcipher_request *req) goto err_free_m; } - ret = -ENOMEM; s = mpi_read_raw_from_sgl(req->src, req->src_len); if (!s) { ret = -ENOMEM; -- 2.9.4
Re: [PATCH v13 01/10] iomap: Use correct endian conversion function in mmio_writeXXbe
On Wed, Mar 21, 2018 at 5:37 PM, Logan Gunthorpewrote: > The semantics of the iowriteXXbe() functions are to write a > value in CPU endianess to an IO register that is known by the > caller to be in Big Endian. The mmio_writeXXbe() macro, which > is called by iowriteXXbe(), should therefore use cpu_to_beXX() > instead of beXX_to_cpu(). > > Seeing both beXX_to_cpu() and cpu_to_beXX() are both functionally > implemented as either null operations or swabXX operations there > was no noticable bug here. But it is confusing for both developers > and code analysis tools alike. > > Signed-off-by: Logan Gunthorpe Your patch is a clear improvement of what we had before, but I notice that we have a weird asymmetry between big-endian and little-endian accessors before and after this patch: void iowrite32(u32 val, void __iomem *addr) { IO_COND(addr, outl(val,port), writel(val, addr)); } void iowrite32be(u32 val, void __iomem *addr) { IO_COND(addr, pio_write32be(val,port), mmio_write32be(val, addr)); } The little-endian iowrite32() when applied to mmio registers uses a 32-bit wide atomic store to a little-endian register with barriers to order against both spinlocks and DMA. The big-endian iowrite32be() on the same pointer uses a nonatomic store with no barriers whatsoever and the opposite endianess. On most architectures, this is not important: - For x86, the stores are aways atomic and no additional barriers are needed, so the two are the same - For ARM (both 32 and 64-bit), powerpc and many others, we don't use the generic iowrite() and just fall back to writel() or writel(swab32()). However, shouldn't we just use the writel(swab32()) logic here as well for the common case rather than risking missing barriers? Arnd
Re: [PATCH v2 3/9] crypto: caam - don't leak pointers to authenc keys
On 3/23/2018 12:42 PM, Tudor Ambarus wrote: > In caam's aead_setkey we save pointers to the authenc keys in a > local variable of type struct crypto_authenc_keys and we don't > zeroize it after use. Fix this and don't leak pointers to the > authenc keys. > > Signed-off-by: Tudor AmbarusReviewed-by: Horia Geantă Thanks, Horia
Re: [PATCH v2 4/9] crypto: caam/qi - don't leak pointers to authenc keys
On 3/23/2018 12:42 PM, Tudor Ambarus wrote: > In caam/qi's aead_setkey we save pointers to the authenc keys in > a local variable of type struct crypto_authenc_keys and we don't > zeroize it after use. Fix this and don't leak pointers to the > authenc keys. > > Signed-off-by: Tudor AmbarusReviewed-by: Horia Geantă Thanks, Horia
Re: [PATCH v6 0/5] add compression algorithm zBeWalgo
Hi Benjamin, Thanks for the nice present and good testing! I hope to grab a chance to test this shiny new algorithm but is busy this week. Hopefully, I will get that soon and feedback to you asap. Thanks. On Mon, Mar 26, 2018 at 10:31:40AM +0200, Benjamin Warnke wrote: > This patch series adds a new compression algorithm to the kernel and to > the crypto api. > > Changes since v5: > - Fixed compile-error due to variable definitions inside #ifdef > CONFIG_ZRAM_WRITEBACK > > Changes since v4: > - Fix mismatching function-prototypes > - Fix mismatching License errors > - Add static to global vars > - Add ULL to long constants > > Changes since v3: > - Split patch into patchset > - Add Zstd = Zstandard to the list of benchmarked algorithms > - Added configurable compression levels to crypto-api > - Added multiple compression levels to the benchmarks below > - Added unsafe decompressor functions to crypto-api > - Added flag to mark unstable algorithms to crypto-api > - Test the code using afl-fuzz -> and fix the code > - Added 2 new Benchmark datasets > - checkpatch.pl fixes > > Changes since v2: > - added linux-kernel Mailinglist > > Changes since v1: > - improved documentation > - improved code style > - replaced numerous casts with get_unaligned* > - added tests in crypto/testmgr.h/c > - added zBeWalgo to the list of algorithms shown by > /sys/block/zram0/comp_algorithm > > > Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM > compresses each page individually. As a result the compression algorithm is > forced to use a very small sliding window. None of the available compression > algorithms is designed to achieve high compression ratios with small inputs. > > This patch-set adds a new compression algorithm 'zBeWalgo' to the crypto api. > This algorithm focusses on increasing the capacity of the compressed > block-device created by ZRAM. The choice of compression algorithms is always > a tradeoff between speed and compression ratio. > > If faster algorithms like 'lz4' are chosen the compression ratio is often > lower than the ratio of zBeWalgo as shown in the following benchmarks. Due to > the lower compression ratio, ZRAM needs to fall back to backing_devices > mode often. If backing_devices are required, the effective speed of ZRAM is a > weighted average of de/compression time and writing/reading from the > backing_device. This should be considered when comparing the speeds in the > benchmarks. > > There are different kinds of backing_devices, each with its own drawbacks. > 1. HDDs: This kind of backing device is very slow. If the compression ratio > of an algorithm is much lower than the ratio of zBeWalgo, it might be faster > to use zBewalgo instead. > 2. SSDs: I tested a swap partition on my NVME-SSD. The speed is even higher > than zram with lz4, but after about 5 Minutes the SSD is blocking all > read/write requests due to overheating. This is definitly not an option. > > > Benchmarks: > > > To obtain reproducable benchmarks, the datasets were first loaded into a > userspace-program. Than the data is written directly to a clean > zram-partition without any filesystem. Between writing and reading 'sync' > and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are > wall clock times, and the benchmarks are using only one cpu-core at a time. > The new algorithm is compared to all available compression algorithms from > the crypto-api. > > Before loading the datasets to user-space deduplication is applied, since > none Algorithm has deduplication. Duplicated pages are removed to > prevent an algorithm to obtain high/low ratios, just because a single page can > be compressed very well - or not. > > All Algorithms marked with '*' are using unsafe decompression. > > All Read and Write Speed Measurements are given in MBit/s > > zbewalgo' uses per dataset specialized different combinations. These can be > specified at runtime via /sys/kernel/zbewalgo/combinations. > > > - '/dev/zero' This dataset is used to measure the speed limitations > for ZRAM. ZRAM filters zero-data internally and does not even call the > specified compression algorithm. > > Algorithm writeread > --zram-- 2724.08 2828.87 > > > - 'ecoham' This dataset is one of the input files for the scientific > application ECOHAM which runs an ocean simulation. This dataset contains a > lot of zeros - even after deduplication. Where the data is not zero there are > arrays of floating point values, adjacent float values are likely to be > similar to each other, allowing for high compression ratios. > > zbewalgo reaches very high compression ratios and is a lot faster than other > algorithms with similar compression ratios. > > Algorithmratiowrite read > --hdd-- 1.00 134.70 156.62 > lz4*_10 6.73 1303.12 1547.17 > lz4_106.73 1303.12 1574.51 > lzo 6.88 1205.98 1468.09 > lz4*_05 7.00 1291.81 1642.41 > lz4_05
[PATCH v6 4/5] crypto: configurable compression level
Most compression algorithms published by the crypto api are supporting multiple different compression levels. The crypto api currently just calls these algorithms with their default compression level. This patch enables the caller to specify the compression level. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/api.c | 76 +++ crypto/deflate.c | 16 + crypto/lz4.c | 16 + crypto/lz4hc.c| 13 +--- crypto/testmgr.c | 2 +- drivers/block/zram/zcomp.c| 10 +++--- drivers/block/zram/zcomp.h| 3 +- drivers/block/zram/zram_drv.c | 24 -- drivers/block/zram/zram_drv.h | 1 + fs/ubifs/compress.c | 2 +- include/linux/crypto.h| 9 +++-- mm/zswap.c| 2 +- net/xfrm/xfrm_ipcomp.c| 3 +- 13 files changed, 146 insertions(+), 31 deletions(-) diff --git a/crypto/api.c b/crypto/api.c index 70a894e52..dadd4dede 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -384,6 +384,47 @@ struct crypto_tfm *__crypto_alloc_tfm(struct crypto_alg *alg, u32 type, } EXPORT_SYMBOL_GPL(__crypto_alloc_tfm); +struct crypto_tfm *__crypto_alloc_tfm_compress(struct crypto_alg *alg, + u32 type, u32 mask, int level) +{ + struct crypto_tfm *tfm = NULL; + unsigned int tfm_size; + int err = -ENOMEM; + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, type, mask); + tfm = kzalloc(tfm_size, GFP_KERNEL); + if (!tfm) + goto out_err; + + tfm->__crt_alg = alg; + if (alg->cra_flags & CRYPTO_ALG_TYPE_COMPRESS) + tfm->crt_compress.cot_level = level; + + err = crypto_init_ops(tfm, type, mask); + if (err) + goto out_free_tfm; + + if (!tfm->exit && alg->cra_init) { + err = alg->cra_init(tfm); + if (err) + goto cra_init_failed; + } + + goto out; + +cra_init_failed: + crypto_exit_ops(tfm); +out_free_tfm: + if (err == -EAGAIN) + crypto_shoot_alg(alg); + kfree(tfm); +out_err: + tfm = ERR_PTR(err); +out: + return tfm; +} +EXPORT_SYMBOL_GPL(__crypto_alloc_tfm_compress); + /* * crypto_alloc_base - Locate algorithm and allocate transform * @alg_name: Name of algorithm @@ -440,6 +481,41 @@ struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask) } EXPORT_SYMBOL_GPL(crypto_alloc_base); +struct crypto_tfm *crypto_alloc_base_compress(const char *alg_name, u32 type, + u32 mask, int level) +{ + struct crypto_tfm *tfm; + int err; + + for (;;) { + struct crypto_alg *alg; + + alg = crypto_alg_mod_lookup(alg_name, type, mask); + if (IS_ERR(alg)) { + err = PTR_ERR(alg); + goto err; + } + + tfm = __crypto_alloc_tfm_compress(alg, type, mask, level); + if (!IS_ERR(tfm)) + return tfm; + + crypto_mod_put(alg); + err = PTR_ERR(tfm); + +err: + if (err != -EAGAIN) + break; + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + } + + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(crypto_alloc_base_compress); + void *crypto_create_tfm(struct crypto_alg *alg, const struct crypto_type *frontend) { diff --git a/crypto/deflate.c b/crypto/deflate.c index 4b681a37c..54a2ff21b 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -24,6 +24,7 @@ * it is not needed for IPCOMP and keeps the code simpler. It can be * implemented if someone wants it. */ + #include #include #include @@ -43,7 +44,7 @@ struct deflate_ctx { struct z_stream_s decomp_stream; }; -static int deflate_comp_init(struct deflate_ctx *ctx, int format) +static int deflate_comp_init(struct deflate_ctx *ctx, int format, int level) { int ret = 0; struct z_stream_s *stream = >comp_stream; @@ -55,9 +56,9 @@ static int deflate_comp_init(struct deflate_ctx *ctx, int format) goto out; } if (format) - ret = zlib_deflateInit(stream, 3); + ret = zlib_deflateInit(stream, level); else - ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, + ret = zlib_deflateInit2(stream, level, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); @@ -109,11 +110,11 @@ static void deflate_decomp_exit(struct deflate_ctx *ctx)
[PATCH v6 2/5] crypto: add zBeWalgo to crypto-api
This patch adds zBeWalgo to the crypto api so that zBeWalgo can be used by zram. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/Kconfig| 12 crypto/Makefile | 1 + crypto/testmgr.c | 10 +++ crypto/testmgr.h | 134 ++ crypto/zbewalgo.c | 164 ++ drivers/block/zram/zcomp.c| 3 + drivers/block/zram/zram_drv.h | 4 +- 7 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 crypto/zbewalgo.c diff --git a/crypto/Kconfig b/crypto/Kconfig index b75264b09..3ac0d4ca7 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -1668,6 +1668,18 @@ config CRYPTO_LZ4 help This is the LZ4 algorithm. +config CRYPTO_ZBEWALGO + tristate "zBeWalgo compression algorithm" + select CRYPTO_ALGAPI + select CRYPTO_ACOMP2 + select ZBEWALGO_COMPRESS + help + This is the zBeWalgo compression algorithm. This algorithm + accepts only input sizes of at most one page at once. + To achieve high compression ratios zbewalgo can call multiple + transformation and compression algorithms in a row to optimize + the compressed size. + config CRYPTO_LZ4HC tristate "LZ4HC compression algorithm" select CRYPTO_ALGAPI diff --git a/crypto/Makefile b/crypto/Makefile index cdbc03b35..2a42fb289 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -121,6 +121,7 @@ obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o obj-$(CONFIG_CRYPTO_LZO) += lzo.o obj-$(CONFIG_CRYPTO_LZ4) += lz4.o +obj-$(CONFIG_CRYPTO_ZBEWALGO) += zbewalgo.o obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o obj-$(CONFIG_CRYPTO_842) += 842.o obj-$(CONFIG_CRYPTO_RNG2) += rng.o diff --git a/crypto/testmgr.c b/crypto/testmgr.c index d5e23a142..294075476 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -3566,6 +3566,16 @@ static const struct alg_test_desc alg_test_descs[] = { .dec = __VECS(tf_xts_dec_tv_template) } } + }, { + .alg = "zbewalgo", + .test = alg_test_comp, + .fips_allowed = 1, + .suite = { + .comp = { + .comp = __VECS(zbewalgo_comp_tv_template), + .decomp = __VECS(zbewalgo_decomp_tv_template) + } + } }, { .alg = "zlib-deflate", .test = alg_test_comp, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 6044f6906..996d8321e 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -35133,6 +35133,140 @@ static const struct hash_testvec bfin_crc_tv_template[] = { }; +static const struct comp_testvec zbewalgo_comp_tv_template[] = { + { + .inlen = 512, + .outlen = 402, + .input = + "\x8a\x3a\xf3\xbe\x33\xf9\xab\x3d\xa1\x51\x9f\x7f\xad\xf6\xab\x3d" + "\xad\x29\x8f\x3c\x27\xf4\xab\x3d\x06\x19\xc3\xf5\xa0\xf1\xab\x3d" + "\xfb\x75\x3b\xab\x1a\xef\xab\x3d\xe3\x96\xf8\x5c\x94\xec\xab\x3d" + "\x13\xd2\xfa\x0a\x0e\xea\xab\x3d\xe0\x7d\x42\xb5\x87\xe7\xab\x3d" + "\xa1\xf0\xcf\x5b\x01\xe5\xab\x3d\xad\x80\xa3\xfe\x7a\xe2\xab\x3d" + "\x59\x84\xbd\x9d\xf4\xdf\xab\x3d\xff\x51\x1e\x39\x6e\xdd\xab\x3d" + "\xf5\x3f\xc6\xd0\xe7\xda\xab\x3d\x96\xa4\xb5\x64\x61\xd8\xab\x3d" + "\x3b\xd6\xec\xf4\xda\xd5\xab\x3d\x3b\x2b\x6c\x81\x54\xd3\xab\x3d" + "\xf2\xf9\x33\x0a\xce\xd0\xab\x3d\xbb\x98\x44\x8f\x47\xce\xab\x3d" + "\xed\x5d\x9e\x10\xc1\xcb\xab\x3d\xe7\x9f\x41\x8e\x3a\xc9\xab\x3d" + "\x07\xb5\x2e\x08\xb4\xc6\xab\x3d\xa9\xf3\x65\x7e\x2d\xc4\xab\x3d" + "\x28\xb2\xe7\xf0\xa6\xc1\xab\x3d\xe3\x46\xb4\x5f\x20\xbf\xab\x3d" + "\x38\x08\xcc\xca\x99\xbc\xab\x3d\x85\x4c\x2f\x32\x13\xba\xab\x3d" + "\x2a\x6a\xde\x95\x8c\xb7\xab\x3d\x85\xb7\xd9\xf5\x05\xb5\xab\x3d" + "\xf7\x8a\x21\x52\x7f\xb2\xab\x3d\xe2\x3a\xb6\xaa\xf8\xaf\xab\x3d" + "\xa5\x1d\x98\xff\x71\xad\xab\x3d\xa3\x89\xc7\x50\xeb\xaa\xab\x3d" + "\x3d\xd5\x44\x9e\x64\xa8\xab\x3d\xd6\x56\x10\xe8\xdd\xa5\xab\x3d" + "\xce\x64\x2a\x2e\x57\xa3\xab\x3d\x8d\x55\x93\x70\xd0\xa0\xab\x3d" + "\x76\x7f\x4b\xaf\x49\x9e\xab\x3d\xeb\x38\x53\xea\xc2\x9b\xab\x3d" + "\x53\xd8\xaa\x21\x3c\x99\xab\x3d\x13\xb4\x52\x55\xb5\x96\xab\x3d" +
[PATCH v6 3/5] crypto: add unsafe decompression to api
Up to Version 3 of this patch the decompressor of zbewalgo did not verify that there is no overflow in the output buffer. Now zbewalgo includes a safe decompressor which does check for buffer overflows and heap-error. ZBewalgo and other Algorithms like lz4 include an unsafe decompressor version, which is a bit faster, but does no error checking. These unsafe decompressors can be applied when the datasource and the whole datapath is trusted. This patch publishes these existing functions in the crypto-api Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/842.c | 3 ++- crypto/compress.c| 10 ++ crypto/crypto_null.c | 3 ++- crypto/deflate.c | 3 ++- crypto/lz4.c | 23 ++- crypto/lz4hc.c | 23 ++- crypto/lzo.c | 3 ++- crypto/testmgr.c | 27 ++- crypto/zbewalgo.c| 29 - drivers/block/zram/zram_drv.c| 34 +- drivers/block/zram/zram_drv.h| 1 + drivers/crypto/cavium/zip/zip_main.c | 6 -- drivers/crypto/nx/nx-842-powernv.c | 3 ++- drivers/crypto/nx/nx-842-pseries.c | 3 ++- include/linux/crypto.h | 16 15 files changed, 174 insertions(+), 13 deletions(-) diff --git a/crypto/842.c b/crypto/842.c index bc26dc942..7e74ea26b 100644 --- a/crypto/842.c +++ b/crypto/842.c @@ -112,7 +112,8 @@ static struct crypto_alg alg = { .cra_exit = crypto842_exit, .cra_u = { .compress = { .coa_compress = crypto842_compress, - .coa_decompress = crypto842_decompress } } + .coa_decompress = crypto842_decompress, + .coa_decompress_unsafe = crypto842_decompress } } }; static struct scomp_alg scomp = { diff --git a/crypto/compress.c b/crypto/compress.c index f2d522924..bec796249 100644 --- a/crypto/compress.c +++ b/crypto/compress.c @@ -33,12 +33,22 @@ static int crypto_decompress(struct crypto_tfm *tfm, dlen); } +static int crypto_decompress_unsafe(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, +u8 *dst, unsigned int *dlen) +{ + return tfm->__crt_alg->cra_compress.coa_decompress_unsafe(tfm, src, + slen, dst, + dlen); +} + int crypto_init_compress_ops(struct crypto_tfm *tfm) { struct compress_tfm *ops = >crt_compress; ops->cot_compress = crypto_compress; ops->cot_decompress = crypto_decompress; + ops->cot_decompress_unsafe = crypto_decompress_unsafe; return 0; } diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index 20ff2c746..6e15e8c0b 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -146,7 +146,8 @@ static struct crypto_alg null_algs[3] = { { .cra_module = THIS_MODULE, .cra_u = { .compress = { .coa_compress = null_compress, - .coa_decompress = null_compress } } + .coa_decompress = null_compress, + .coa_decompress_unsafe = null_compress } } } }; MODULE_ALIAS_CRYPTO("compress_null"); diff --git a/crypto/deflate.c b/crypto/deflate.c index 94ec3b36a..4b681a37c 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -286,7 +286,8 @@ static struct crypto_alg alg = { .cra_exit = deflate_exit, .cra_u = { .compress = { .coa_compress = deflate_compress, - .coa_decompress = deflate_decompress } } + .coa_decompress = deflate_decompress, + .coa_decompress_unsafe = deflate_decompress } } }; static struct scomp_alg scomp[] = { { diff --git a/crypto/lz4.c b/crypto/lz4.c index 2ce2660d3..60a1914b7 100644 --- a/crypto/lz4.c +++ b/crypto/lz4.c @@ -103,6 +103,19 @@ static int __lz4_decompress_crypto(const u8 *src, unsigned int slen, return 0; } +static int __lz4_decompress_crypto_unsafe(const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen, + void *ctx) +{ + int out_len = LZ4_decompress_fast(src, dst, *dlen); + + if (out_len < 0) + return -EINVAL; + + *dlen = out_len; + return 0; +} + static int lz4_sdecompress(struct crypto_scomp *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen, void *ctx) @@ -117,6 +130,13 @@ static int lz4_decompress_crypto(struct
[PATCH v6 0/5] add compression algorithm zBeWalgo
This patch series adds a new compression algorithm to the kernel and to the crypto api. Changes since v5: - Fixed compile-error due to variable definitions inside #ifdef CONFIG_ZRAM_WRITEBACK Changes since v4: - Fix mismatching function-prototypes - Fix mismatching License errors - Add static to global vars - Add ULL to long constants Changes since v3: - Split patch into patchset - Add Zstd = Zstandard to the list of benchmarked algorithms - Added configurable compression levels to crypto-api - Added multiple compression levels to the benchmarks below - Added unsafe decompressor functions to crypto-api - Added flag to mark unstable algorithms to crypto-api - Test the code using afl-fuzz -> and fix the code - Added 2 new Benchmark datasets - checkpatch.pl fixes Changes since v2: - added linux-kernel Mailinglist Changes since v1: - improved documentation - improved code style - replaced numerous casts with get_unaligned* - added tests in crypto/testmgr.h/c - added zBeWalgo to the list of algorithms shown by /sys/block/zram0/comp_algorithm Currently ZRAM uses compression-algorithms from the crypto-api. ZRAM compresses each page individually. As a result the compression algorithm is forced to use a very small sliding window. None of the available compression algorithms is designed to achieve high compression ratios with small inputs. This patch-set adds a new compression algorithm 'zBeWalgo' to the crypto api. This algorithm focusses on increasing the capacity of the compressed block-device created by ZRAM. The choice of compression algorithms is always a tradeoff between speed and compression ratio. If faster algorithms like 'lz4' are chosen the compression ratio is often lower than the ratio of zBeWalgo as shown in the following benchmarks. Due to the lower compression ratio, ZRAM needs to fall back to backing_devices mode often. If backing_devices are required, the effective speed of ZRAM is a weighted average of de/compression time and writing/reading from the backing_device. This should be considered when comparing the speeds in the benchmarks. There are different kinds of backing_devices, each with its own drawbacks. 1. HDDs: This kind of backing device is very slow. If the compression ratio of an algorithm is much lower than the ratio of zBeWalgo, it might be faster to use zBewalgo instead. 2. SSDs: I tested a swap partition on my NVME-SSD. The speed is even higher than zram with lz4, but after about 5 Minutes the SSD is blocking all read/write requests due to overheating. This is definitly not an option. Benchmarks: To obtain reproducable benchmarks, the datasets were first loaded into a userspace-program. Than the data is written directly to a clean zram-partition without any filesystem. Between writing and reading 'sync' and 'echo 3 > /proc/sys/vm/drop_caches' is called. All time measurements are wall clock times, and the benchmarks are using only one cpu-core at a time. The new algorithm is compared to all available compression algorithms from the crypto-api. Before loading the datasets to user-space deduplication is applied, since none Algorithm has deduplication. Duplicated pages are removed to prevent an algorithm to obtain high/low ratios, just because a single page can be compressed very well - or not. All Algorithms marked with '*' are using unsafe decompression. All Read and Write Speed Measurements are given in MBit/s zbewalgo' uses per dataset specialized different combinations. These can be specified at runtime via /sys/kernel/zbewalgo/combinations. - '/dev/zero' This dataset is used to measure the speed limitations for ZRAM. ZRAM filters zero-data internally and does not even call the specified compression algorithm. Algorithm writeread --zram-- 2724.08 2828.87 - 'ecoham' This dataset is one of the input files for the scientific application ECOHAM which runs an ocean simulation. This dataset contains a lot of zeros - even after deduplication. Where the data is not zero there are arrays of floating point values, adjacent float values are likely to be similar to each other, allowing for high compression ratios. zbewalgo reaches very high compression ratios and is a lot faster than other algorithms with similar compression ratios. Algorithmratiowrite read --hdd-- 1.00 134.70 156.62 lz4*_10 6.73 1303.12 1547.17 lz4_106.73 1303.12 1574.51 lzo 6.88 1205.98 1468.09 lz4*_05 7.00 1291.81 1642.41 lz4_057.00 1291.81 1682.81 lz4_077.13 1250.29 1593.89 lz4*_07 7.13 1250.29 1677.08 lz4_067.16 1307.62 1666.66 lz4*_06 7.16 1307.62 1669.42 lz4_037.21 1250.87 1449.48 lz4*_03 7.21 1250.87 1621.97 lz4*_04 7.23 1281.62 1645.56 lz4_047.23 1281.62 1666.81 lz4_027.33 1267.54 1523.11 lz4*_02 7.33 1267.54 1576.54 lz4_097.36 1140.55 1510.01 lz4*_09 7.36 1140.55 1692.38 lz4*_01 7.36 1215.40
[PATCH v6 1/5] add compression algorithm zBeWalgo
zBeWalgo is a completely new algorithm - Currently it is not published somewhere else right now, googleing it would not show up any results. The following section describes how the algorithm works. zBeWalgo itself is a container compression algorithm, which can execute multiple different compression and transformation algorithms after each other. The execution of different compression algorithms after each other will be called 'combination' in this description and in the code. Additionally to be able to execute combinations of algorithms, zBeWalgo can try different combinations on the same input. This allows high compression ratios on completely different datasets, which would otherwise require its own algorithm each. Executing all known combinations on each input page would be very slow. Therefore the data is compressed at first with that combination, which was already successful on the last input page. If the compressed data size of the current page is similar to that of the last page, the compressed data is returned immediately without even trying the other combinations. Even if there is no guarantee that consecutive calls to the algorithm belong to each other, the speed improvement is obvious. ZRAM uses zsmalloc for the management of the compressed pages. The largest size-class in zsmalloc is 3264 Bytes. If the compressed data is larger than that threshold, ZRAM ignores the compression and writes the uncompressed page instead. As a consequence it is useless to continue compression, if the algorithm detects, that the data can not be compressed using the current combination. The threshold for aborting compression can be changed via sysfs at any time, even if the algorithm is currently in use. If a combination fails to compress the data, zBeWalgo tries the next combination. If no combination is able to reduce the data in size, zBeWalgo returns a negative value. Each combination consists of up to 7 compression and transformation steps. Combinations can be added and removed at any time via sysfs. Already compressed Data can always be decompressed, even if the combination used to produce it does not exist anymore. Technically the user could add up to 256 combinations concurrently, but that would be very time consuming if the data can not be compressed. To be able to build combinations and call different algorithms, all those algorithms are implementing the same interface. This enables the user to specify additional combinations while ZRAM is running. Within the combinations many different algorithms can be used. Some of those algorithms are published. This patch adds the following algorithms to be used within the combinations: - bwt: The Burrows-Wheeler-Transformation was published by 'M. Burrows' and 'D. J. Wheeler' in 1994. This implementation uses counting sort for sorting the data. Their original paper is online available at: http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf - mtf: The Move-To-Front algorithm as described by 'M. Burrows' and 'D. J. Wheeler' in the same paper as bwt. - jbe: j-bit-encoding as proposed by 'I Made Agus Dwi Suarjaya' in 2012. https://arxiv.org/pdf/1209.1045.pdf - jbe2: A minor modification of jbe. Swapping groups of 4 Bit in consecutive Bytes can increase the compression ratio, if for example the first 4 Bits of each Byte are zero. If jbe2 is called after mtf, this happens ofthen. - rle: Run Length Encoding - huffman: Huffman encoding - bewalgo: I invented this algorithm for my bachelors thesis 'Page-Based compression in the Linux Kernel'. This algorithm is mainly inspired by lz4, focusing on increasing the speed even more, with the help of page aligned read an write access. To achieve the page alignment, the input and output data is accessed only in blocks of 8 Bytes, therefore the encoding of the compressed data is changed. https://wr.informatik.uni-hamburg.de/_media/research:theses:benjamin_warnke_page_based_compression_in_the_linux_kernel.pdf - bewalgo2: At the beginning of my work to improve ZRAM this was the whole algorithm. The input is read in blocks of 8 Bytes. These Blocks are added to an avl-tree. The avl-tree is mapped directly to an array. The encoding is a variation of Run Length Encoding using the indices in the avl-tree as data. The reason for using the tree with indices is, that the indices can be encoded in less then 8 Bytes each. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- include/linux/zbewalgo.h | 50 lib/Kconfig | 3 + lib/Makefile | 1 + lib/zbewalgo/BWT.c| 120 lib/zbewalgo/BWT.h| 21 ++ lib/zbewalgo/JBE.c| 204 + lib/zbewalgo/JBE.h| 13 + lib/zbewalgo/JBE2.c | 221 ++ lib/zbewalgo/JBE2.h | 13 + lib/zbewalgo/MTF.c| 122 lib/zbewalgo/MTF.h| 13 + lib/zbewalgo/Makefile | 4 +
[PATCH v6 5/5] crypto: add flag for unstable encoding
The data-format of zBeWalgo, and some other algorithms is unstable. To identify such unstable algorithms this patch adds a new flag to the crypto-api. Signed-off-by: Benjamin Warnke <4bwar...@informatik.uni-hamburg.de> --- crypto/zbewalgo.c | 2 +- include/linux/crypto.h | 6 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/zbewalgo.c b/crypto/zbewalgo.c index 9db0d43be..e57b5ced5 100644 --- a/crypto/zbewalgo.c +++ b/crypto/zbewalgo.c @@ -134,7 +134,7 @@ static int zbewalgo_decompress_crypto_unsafe(struct crypto_tfm *tfm, static struct crypto_alg crypto_alg_zbewalgo = { .cra_name = "zbewalgo", - .cra_flags = CRYPTO_ALG_TYPE_COMPRESS, + .cra_flags = CRYPTO_ALG_TYPE_COMPRESS | CRYPTO_ALG_UNSTABLE_ENCODING, .cra_ctxsize = sizeof(struct zbewalgo_ctx), .cra_module = THIS_MODULE, .cra_init = zbewalgo_init, diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 63420dac0..372893569 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -112,6 +112,12 @@ */ #define CRYPTO_ALG_OPTIONAL_KEY0x4000 +/* + * Set if the algorithm is new and it is likely that the encoding may + * change in near future + */ +#define CRYPTO_ALG_UNSTABLE_ENCODING 0x8000 + /* * Transform masks and values (for crt_flags). */ -- 2.14.1
[PATCH 0/2] crypto: ccree: cleanup and hardware keys
Small cleanup and add support for CryptoCell hardware keys. Gilad Ben-Yossef (2): crypto: ccree: remove unused enums crypto: ccree: enable support for hardware keys crypto/testmgr.c| 43 drivers/crypto/ccree/cc_cipher.c| 348 drivers/crypto/ccree/cc_cipher.h| 30 +-- drivers/crypto/ccree/cc_hw_queue_defs.h | 28 +-- 4 files changed, 366 insertions(+), 83 deletions(-) -- 2.7.4
[PATCH 1/2] crypto: ccree: remove unused enums
Remove enums definitions unused in the driver code. Signed-off-by: Gilad Ben-Yossef--- drivers/crypto/ccree/cc_hw_queue_defs.h | 28 +++- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/drivers/crypto/ccree/cc_hw_queue_defs.h b/drivers/crypto/ccree/cc_hw_queue_defs.h index bf0d235..a091ae5 100644 --- a/drivers/crypto/ccree/cc_hw_queue_defs.h +++ b/drivers/crypto/ccree/cc_hw_queue_defs.h @@ -124,13 +124,6 @@ enum cc_flow_mode { FLOW_MODE_END = S32_MAX, }; -enum cc_tunnel_op { - TUNNEL_OP_INVALID = -1, - TUNNEL_OFF = 0, - TUNNEL_ON = 1, - TUNNEL_OP_END = S32_MAX, -}; - enum cc_setup_op { SETUP_LOAD_NOP = 0, SETUP_LOAD_STATE0 = 1, @@ -145,6 +138,13 @@ enum cc_setup_op { SETUP_OP_END = S32_MAX, }; +enum cc_hash_conf_pad { + HASH_PADDING_DISABLED = 0, + HASH_PADDING_ENABLED = 1, + HASH_DIGEST_RESULT_LITTLE_ENDIAN = 2, + HASH_CONFIG1_PADDING_RESERVE32 = S32_MAX, +}; + enum cc_aes_mac_selector { AES_SK = 1, AES_CMAC_INIT = 2, @@ -179,20 +179,6 @@ enum cc_hw_aes_key_size { END_OF_AES_KEYS = S32_MAX, }; -enum cc_hw_des_key_size { - DES_ONE_KEY = 0, - DES_TWO_KEYS = 1, - DES_THREE_KEYS = 2, - END_OF_DES_KEYS = S32_MAX, -}; - -enum cc_hash_conf_pad { - HASH_PADDING_DISABLED = 0, - HASH_PADDING_ENABLED = 1, - HASH_DIGEST_RESULT_LITTLE_ENDIAN = 2, - HASH_CONFIG1_PADDING_RESERVE32 = S32_MAX, -}; - enum cc_hash_cipher_pad { DO_NOT_PAD = 0, DO_PAD = 1, -- 2.7.4
[PATCH 2/2] crypto: ccree: enable support for hardware keys
Enable CryptoCell support for hardware keys. Hardware keys are regular AES keys loaded into CryptoCell internal memory via firmware, often from secure boot ROM or hardware fuses at boot time. As such, they can be used for enc/dec purposes like any other key but cannot (read: extremely hard to) be extracted since since they are not available anywhere in RAM during runtime. The mechanism has some similarities to s390 secure keys although the keys are not wrapped or sealed, but simply loaded offline. The interface was therefore modeled based on the s390 secure keys support. Signed-off-by: Gilad Ben-Yossef--- crypto/testmgr.c | 43 + drivers/crypto/ccree/cc_cipher.c | 348 ++- drivers/crypto/ccree/cc_cipher.h | 30 +--- 3 files changed, 359 insertions(+), 62 deletions(-) diff --git a/crypto/testmgr.c b/crypto/testmgr.c index af4a01c..8a5a60c 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -2558,6 +2558,13 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + /* Same as cbc(aes) except the key is stored in +* hardware secure memory which we reference by index +*/ + .alg = "cbc(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { .alg = "cbc(serpent)", .test = alg_test_skcipher, .suite = { @@ -2704,6 +2711,13 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + /* Same as ctr(aes) except the key is stored in +* hardware secure memory which we reference by index +*/ + .alg = "ctr(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { .alg = "ctr(serpent)", .test = alg_test_skcipher, .suite = { @@ -2974,6 +2988,13 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + /* Same as ecb(aes) except the key is stored in +* hardware secure memory which we reference by index +*/ + .alg = "ecb(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { .alg = "ecb(khazad)", .test = alg_test_skcipher, .suite = { @@ -3301,6 +3322,13 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + /* Same as ofb(aes) except the key is stored in +* hardware secure memory which we reference by index +*/ + .alg = "ofb(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { .alg = "pcbc(fcrypt)", .test = alg_test_skcipher, .suite = { @@ -3558,6 +3586,21 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + /* Same as xts(aes) except the key is stored in +* hardware secure memory which we reference by index +*/ + .alg = "xts(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { + .alg = "xts4096(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { + .alg = "xts512(haes)", + .test = alg_test_null, + .fips_allowed = 1, + }, { .alg = "xts(camellia)", .test = alg_test_skcipher, .suite = { diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c index df98f7a..8ccb7c4 100644 --- a/drivers/crypto/ccree/cc_cipher.c +++ b/drivers/crypto/ccree/cc_cipher.c @@ -42,6 +42,7 @@ struct cc_cipher_ctx { int cipher_mode; int flow_mode; unsigned int flags; + bool hw_key; struct cc_user_key_info user; struct cc_hw_key_info hw; struct crypto_shash *shash_tfm; @@ -49,6 +50,13 @@ struct cc_cipher_ctx { static void cc_cipher_complete(struct device *dev, void *cc_req, int err); +static inline bool cc_is_hw_key(struct crypto_tfm *tfm) +{ + struct cc_cipher_ctx *ctx_p = crypto_tfm_ctx(tfm); + + return ctx_p->hw_key; +} + static int validate_keys_sizes(struct cc_cipher_ctx *ctx_p, u32 size) { switch (ctx_p->flow_mode) { @@ -211,7 +219,7 @@ struct tdes_keys { u8 key3[DES_KEY_SIZE]; }; -static enum cc_hw_crypto_key hw_key_to_cc_hw_key(int slot_num) +static enum cc_hw_crypto_key cc_slot_to_hw_key(int slot_num) { switch (slot_num) { case 0: @@ -226,69 +234,98 @@ static enum cc_hw_crypto_key