[PATCH v2 2/2] ext4: Optimize match for casefolded encrypted dirs

2021-03-19 Thread Daniel Rosenberg
Matching names with casefolded encrypting directories requires
decrypting entries to confirm case since we are case preserving. We can
avoid needing to decrypt if our hash values don't match.

Signed-off-by: Daniel Rosenberg 
---
 fs/ext4/ext4.h  | 17 ---
 fs/ext4/namei.c | 55 ++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index dafa528c4d9f..181d07791efb 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2637,9 +2637,9 @@ extern unsigned ext4_free_clusters_after_init(struct 
super_block *sb,
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 #ifdef CONFIG_UNICODE
-extern void ext4_fname_setup_ci_filename(struct inode *dir,
+extern int ext4_fname_setup_ci_filename(struct inode *dir,
 const struct qstr *iname,
-struct fscrypt_str *fname);
+struct ext4_filename *fname);
 #endif
 
 #ifdef CONFIG_FS_ENCRYPTION
@@ -2670,9 +2670,9 @@ static inline int ext4_fname_setup_filename(struct inode 
*dir,
ext4_fname_from_fscrypt_name(fname, );
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, iname, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, iname, fname);
 #endif
-   return 0;
+   return err;
 }
 
 static inline int ext4_fname_prepare_lookup(struct inode *dir,
@@ -2689,9 +2689,9 @@ static inline int ext4_fname_prepare_lookup(struct inode 
*dir,
ext4_fname_from_fscrypt_name(fname, );
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, >d_name, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, >d_name, fname);
 #endif
-   return 0;
+   return err;
 }
 
 static inline void ext4_fname_free_filename(struct ext4_filename *fname)
@@ -2716,15 +2716,16 @@ static inline int ext4_fname_setup_filename(struct 
inode *dir,
int lookup,
struct ext4_filename *fname)
 {
+   int err = 0;
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, iname, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, iname, fname);
 #endif
 
-   return 0;
+   return err;
 }
 
 static inline int ext4_fname_prepare_lookup(struct inode *dir,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 97d2755b9775..1fb7128220ce 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -816,7 +816,9 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-   if (fname && fname_name(fname))
+   /* hash is already computed for encrypted casefolded directory */
+   if (fname && fname_name(fname) &&
+   !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;
 
@@ -1367,19 +1369,21 @@ static int ext4_ci_compare(const struct inode *parent, 
const struct qstr *name,
return ret;
 }
 
-void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
- struct fscrypt_str *cf_name)
+int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct ext4_filename *name)
 {
+   struct fscrypt_str *cf_name = >cf_name;
+   struct dx_hash_info *hinfo = >hinfo;
int len;
 
if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
cf_name->name = NULL;
-   return;
+   return 0;
}
 
cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
if (!cf_name->name)
-   return;
+   return -ENOMEM;
 
len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name,
@@ -1387,10 +1391,18 @@ void ext4_fname_setup_ci_filename(struct inode *dir, 
const struct qstr *iname,
if (len <= 0) {
kfree(cf_name->name);
cf_name->name = NULL;
-   return;
}
cf_name->len = (unsigned) len;
+   if (!IS_ENCRYPTED(dir))
+   return 0;
 
+   hinfo->hash_version = DX_HASH_SIPHASH;
+   hinfo->seed = NULL;
+   if (cf_name->name)
+   ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+   else
+   ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
+   return 0;
 }
 #endif
 
@@ -1420,16 +1432,12 @@ static bool ext4_m

[PATCH v2 1/2] ext4: Handle casefolding with encryption

2021-03-19 Thread Daniel Rosenberg
This adds support for encryption with casefolding.

Since the name on disk is case preserving, and also encrypted, we can no
longer just recompute the hash on the fly. Additionally, to avoid
leaking extra information from the hash of the unencrypted name, we use
siphash via an fscrypt v2 policy.

The hash is stored at the end of the directory entry for all entries
inside of an encrypted and casefolded directory apart from those that
deal with '.' and '..'. This way, the change is backwards compatible
with existing ext4 filesystems.

Signed-off-by: Daniel Rosenberg 
---
 Documentation/filesystems/ext4/directory.rst |  27 +++
 fs/ext4/dir.c|  37 +++-
 fs/ext4/ext4.h   |  56 +-
 fs/ext4/hash.c   |  25 ++-
 fs/ext4/inline.c |  25 ++-
 fs/ext4/namei.c  | 198 ++-
 fs/ext4/super.c  |   6 -
 7 files changed, 285 insertions(+), 89 deletions(-)

diff --git a/Documentation/filesystems/ext4/directory.rst 
b/Documentation/filesystems/ext4/directory.rst
index 073940cc64ed..55f618b37144 100644
--- a/Documentation/filesystems/ext4/directory.rst
+++ b/Documentation/filesystems/ext4/directory.rst
@@ -121,6 +121,31 @@ The directory file type is one of the following values:
* - 0x7
  - Symbolic link.
 
+To support directories that are both encrypted and casefolded directories, we
+must also include hash information in the directory entry. We append
+``ext4_extended_dir_entry_2`` to ``ext4_dir_entry_2`` except for the entries
+for dot and dotdot, which are kept the same. The structure follows immediately
+after ``name`` and is included in the size listed by ``rec_len`` If a directory
+entry uses this extension, it may be up to 271 bytes.
+
+.. list-table::
+   :widths: 8 8 24 40
+   :header-rows: 1
+
+   * - Offset
+ - Size
+ - Name
+ - Description
+   * - 0x0
+ - \_\_le32
+ - hash
+ - The hash of the directory name
+   * - 0x4
+ - \_\_le32
+ - minor\_hash
+ - The minor hash of the directory name
+
+
 In order to add checksums to these classic directory blocks, a phony
 ``struct ext4_dir_entry`` is placed at the end of each leaf block to
 hold the checksum. The directory entry is 12 bytes long. The inode
@@ -322,6 +347,8 @@ The directory hash is one of the following values:
  - Half MD4, unsigned.
* - 0x5
  - Tea, unsigned.
+   * - 0x6
+ - Siphash.
 
 Interior nodes of an htree are recorded as ``struct dx_node``, which is
 also the full length of a data block:
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5ed870614c8d..21a98288de49 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -55,6 +55,18 @@ static int is_dx_dir(struct inode *inode)
return 0;
 }
 
+static bool is_fake_dir_entry(struct ext4_dir_entry_2 *de)
+{
+   /* Check if . or .. , or skip if namelen is 0 */
+   if ((de->name_len > 0) && (de->name_len <= 2) && (de->name[0] == '.') &&
+   (de->name[1] == '.' || de->name[1] == '\0'))
+   return true;
+   /* Check if this is a csum entry */
+   if (de->file_type == EXT4_FT_DIR_CSUM)
+   return true;
+   return false;
+}
+
 /*
  * Return 0 if the directory entry is OK, and 1 if there is a problem
  *
@@ -73,16 +85,20 @@ int __ext4_check_dir_entry(const char *function, unsigned 
int line,
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
const int next_offset = ((char *) de - buf) + rlen;
+   bool fake = is_fake_dir_entry(de);
+   bool has_csum = ext4_has_metadata_csum(dir->i_sb);
 
-   if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
+   if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
error_msg = "rec_len is smaller than minimal";
else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
-   else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
+   else if (unlikely(rlen < ext4_dir_rec_len(de->name_len,
+   fake ? NULL : dir)))
error_msg = "rec_len is too small for name_len";
else if (unlikely(next_offset > size))
error_msg = "directory entry overrun";
-   else if (unlikely(next_offset > size - EXT4_DIR_REC_LEN(1) &&
+   else if (unlikely(next_offset > size - ext4_dir_rec_len(1,
+ has_csum ? NULL : dir) &&
  next_offset != size))
error_msg = "directory entry too close to block end";
else if (unlikely(le32_to_cpu(de->inode) >
@@ -94,15 +110,15 @@ int __ext4_check_dir_entry(const char *

[PATCH v2 0/2] Reconcile Encryption and Casefolding in Ext4

2021-03-19 Thread Daniel Rosenberg
These patches add support for ext4 encryption and casefolding at the same time.
Since the hash for encrypted casefolded directory names cannot be computed
without the key, we need to store the hash on disk. We only do so for encrypted
and casefolded directories to avoid on disk format changes.

e2fsprogs has already been updated with support for casefolding and encryption.

v2 changes:
When checking for 'fake' entries (which do not include the extra hash 
bytes)
-Check for . and .. using names instead of position
-Check for csum entries via file_type instead of position
-Assume last entry in directory will be csum for __ext4_check_entry if 
csum enabled

This means we don't need to pass along lblk all over the place

-Don't use siphash value for find_group_orlov, just use regular hash

Daniel Rosenberg (2):
  ext4: Handle casefolding with encryption
  ext4: Optimize match for casefolded encrypted dirs

 Documentation/filesystems/ext4/directory.rst |  27 +++
 fs/ext4/dir.c|  37 +++-
 fs/ext4/ext4.h   |  73 +--
 fs/ext4/hash.c   |  25 ++-
 fs/ext4/inline.c |  25 ++-
 fs/ext4/namei.c  | 213 ++-
 fs/ext4/super.c  |   6 -
 7 files changed, 303 insertions(+), 103 deletions(-)


base-commit: f296bfd5cd04cbb49b8fc9585adc280ab2b58624
-- 
2.31.0.rc2.261.g7f71774620-goog



Re: [PATCH 1/2] ext4: Handle casefolding with encryption

2021-02-18 Thread Daniel Rosenberg
On Wed, Feb 17, 2021 at 2:48 PM Andreas Dilger  wrote:
>
> On Feb 17, 2021, at 9:08 AM, Theodore Ts'o  wrote:
> >
> > On Tue, Feb 16, 2021 at 08:01:11PM -0800, Daniel Rosenberg wrote:
> >> I'm not sure what the conflict is, at least format-wise. Naturally,
> >> there would need to be some work to reconcile the two patches, but my
> >> patch only alters the format for directories which are encrypted and
> >> casefolded, which always must have the additional hash field. In the
> >> case of dirdata along with encryption and casefolding, couldn't we
> >> have the dirdata simply follow after the existing data? Since we
> >> always already know the length, it'd be unambiguous where that would
> >> start. Casefolding can only be altered on an empty directory, and you
> >> can only enable encryption for an empty directory, so I'm not too
> >> concerned there. I feel like having it swapping between the different
> >> methods makes it more prone to bugs, although it would be doable. I've
> >> started rebasing the dirdata patch on my end to see how easy it is to
> >> mix the two. At a glance, they touch a lot of the same areas in
> >> similar ways, so it shouldn't be too hard. It's more of a question of
> >> which way we want to resolve that, and which patch goes first.
> >>
> >> I've been trying to figure out how many devices in the field are using
> >> casefolded encryption, but haven't found out yet. The code is
> >> definitely available though, so I would not be surprised if it's being
> >> used, or is about to be.
> >
> > The problem is in how the space after the filename in a directory is
> > encoded.  The dirdata format is (mildly) expandable, supporting up to
> > 4 different metadata chunks after the filename, using a very
> > compatctly encoded TLV (or moral equivalent) scheme.  For directory
> > inodes that have both the encyption and compression flags set, we have
> > a single blob which gets used as the IV for the crypto.
> >
> > So it's the difference between a simple blob that is only used for one
> > thing in this particular case, and something which is the moral
> > equivalent of simple ASN.1 or protobuf encoding.
> >
> > Currently, datadata has defined uses for 2 of the 4 "chunks", which is
> > used in Lustre servers.  The proposal which Andreas has suggested is
> > if the dirdata feature is supported, then the 3rd dirdata chunk would
> > be used for the case where we currently used by the
> > encrypted-casefolded extension, and the 4th would get reserved for a
> > to-be-defined extension mechanism.
> >
> > If there ext4 encrypted/casefold is not yet in use, and we can get the
> > changes out to all potential users before they release products out
> > into the field, then one approach would be to only support
> > encrypted/casefold when dirdata is also enabled.
> >
> > If ext4 encrypted/casefold is in use, my suggestion is that we support
> > both encrypted/casefold && !dirdata as you have currently implemented
> > it, and encrypted/casefold && dirdata as Andreas has proposed.
> >
> > IIRC, supporting that Andreas's scheme essentially means that we use
> > the top four bits in the rec_len field to indicate which chunks are
> > present, and then for each chunk which is present, there is a 1 byte
> > length followed by payload.  So that means in the case where it's
> > encrypted/casefold && dirdata, the required storage of the directory
> > entry would take one additional byte, plus setting a bit indicating
> > that the encrypted/casefold dirdata chunk was present.
>
> I think your email already covers pretty much all of the points.
>
> One small difference between current "raw" encrypted/casefold hash vs.
> dirdata is that the former is 4-byte aligned within the dirent, while
> dirdata is packed.  So in 3/4 cases dirdata would take the same amount
> of space (the 1-byte length would use one of the 1-3 bytes of padding
> vs. the raw format), since the next dirent needs to be aligned anyway.
>
> The other implication here is that the 8-byte hash may need to be
> copied out of the dirent into a local variable before use, due to
> alignment issues, but I'm not sure if that is actually needed or not.
>
> > So, no, they aren't incompatible ultimatly, but it might require a
> > tiny bit more work to integrate the combined support for dirdata plus
> > encrypted/casefold.  One way we can do this, if we have to support the
> > current encrypted/casefold format because it's out there in deployed
>

Re: [PATCH 1/2] ext4: Handle casefolding with encryption

2021-02-16 Thread Daniel Rosenberg
I'm not sure what the conflict is, at least format-wise. Naturally,
there would need to be some work to reconcile the two patches, but my
patch only alters the format for directories which are encrypted and
casefolded, which always must have the additional hash field. In the
case of dirdata along with encryption and casefolding, couldn't we
have the dirdata simply follow after the existing data? Since we
always already know the length, it'd be unambiguous where that would
start. Casefolding can only be altered on an empty directory, and you
can only enable encryption for an empty directory, so I'm not too
concerned there. I feel like having it swapping between the different
methods makes it more prone to bugs, although it would be doable. I've
started rebasing the dirdata patch on my end to see how easy it is to
mix the two. At a glance, they touch a lot of the same areas in
similar ways, so it shouldn't be too hard. It's more of a question of
which way we want to resolve that, and which patch goes first.

I've been trying to figure out how many devices in the field are using
casefolded encryption, but haven't found out yet. The code is
definitely available though, so I would not be surprised if it's being
used, or is about to be.

-Daniel
On Tue, Feb 9, 2021 at 8:03 PM Theodore Ts'o  wrote:
>
> On Tue, Feb 09, 2021 at 08:03:10PM -0700, Andreas Dilger wrote:
> > Depending on the size of the "escape", it probably makes sense to move
> > toward having e2fsck migrate from the current mechanism to using dirdata
> > for all deployments.  In the current implementation, tools don't really
> > know for sure if there is data beyond the filename in the dirent or not.
>
> It's actually quite well defined.  If dirdata is enabled, then we
> follow the dirdata rules.  If dirdata is *not* enabled, then if a
> directory inode has the case folding and encryption flags set, then
> there will be cryptographic data immediately following the filename.
> Otherwise, there is no valid data after the filename.
>
> > For example, what if casefold is enabled on an existing filesystem that
> > already has an encrypted directory?  Does the code _assume_ that there is
> > a hash beyond the name if the rec_len is long enough for this?
>
> No, we will only expect there to be a hash beyond the name if
> EXT4_CASEFOLD_FL and EXT4_ENCRYPT_FL flags are set on the inode.  (And
> if the rec_len is not large enough, then that's a corrupted directory
> entry.)
>
> > I guess it is implicit with the casefold+encryption case for dirents in
> > directories that have the encryption flag set in a filesystem that also
> > has casefold enabled, but it's definitely not friendly to these features
> > being enabled on an existing filesystem.
>
> No, it's fine.  That's because the EXT4_CASEFOLD_FL inode flag can
> only be set if the EXT4_FEATURE_INCOMPAT_CASEFOLD is set in the
> superblock, and EXT4_ENCRYPT_FL inode flag can only be set if
> EXT4_FEATURE_INCOMPAT_ENCRYPT is set in the superblock, this is why it
> will be safe to enable of these features, since merely enabling the
> file system features only allows new directories to be created with
> both CASEFOLD_FL and ENCRYPT_FL set.
>
> The only restriction we would have is a file system has both the case
> folding and encryption features, it will *not* be safe to set the
> dirdata feature flag without first scanning all of the directories to
> see if there are any directories that have both the casefold and
> encrypt flags set on that inode, and if so, to convert all of the
> directory entries to use dirdata.  I don't think this is going to be a
> significant restriction in practice, though.
>
> - Ted
>
>
> --
> To unsubscribe from this group and stop receiving emails from it, send an 
> email to kernel-team+unsubscr...@android.com.
>


[PATCH 1/2] ext4: Handle casefolding with encryption

2021-02-03 Thread Daniel Rosenberg
This adds support for encryption with casefolding.

Since the name on disk is case preserving, and also encrypted, we can no
longer just recompute the hash on the fly. Additionally, to avoid
leaking extra information from the hash of the unencrypted name, we use
siphash via an fscrypt v2 policy.

The hash is stored at the end of the directory entry for all entries
inside of an encrypted and casefolded directory apart from those that
deal with '.' and '..'. This way, the change is backwards compatible
with existing ext4 filesystems.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Paul Lawrence 
---
 Documentation/filesystems/ext4/directory.rst |  27 ++
 fs/ext4/dir.c|  46 ++-
 fs/ext4/ext4.h   |  62 +++-
 fs/ext4/hash.c   |  25 +-
 fs/ext4/ialloc.c |   5 +-
 fs/ext4/inline.c |  41 +--
 fs/ext4/namei.c  | 308 +--
 fs/ext4/super.c  |   6 -
 8 files changed, 373 insertions(+), 147 deletions(-)

diff --git a/Documentation/filesystems/ext4/directory.rst 
b/Documentation/filesystems/ext4/directory.rst
index 073940cc64ed..55f618b37144 100644
--- a/Documentation/filesystems/ext4/directory.rst
+++ b/Documentation/filesystems/ext4/directory.rst
@@ -121,6 +121,31 @@ The directory file type is one of the following values:
* - 0x7
  - Symbolic link.
 
+To support directories that are both encrypted and casefolded directories, we
+must also include hash information in the directory entry. We append
+``ext4_extended_dir_entry_2`` to ``ext4_dir_entry_2`` except for the entries
+for dot and dotdot, which are kept the same. The structure follows immediately
+after ``name`` and is included in the size listed by ``rec_len`` If a directory
+entry uses this extension, it may be up to 271 bytes.
+
+.. list-table::
+   :widths: 8 8 24 40
+   :header-rows: 1
+
+   * - Offset
+ - Size
+ - Name
+ - Description
+   * - 0x0
+ - \_\_le32
+ - hash
+ - The hash of the directory name
+   * - 0x4
+ - \_\_le32
+ - minor\_hash
+ - The minor hash of the directory name
+
+
 In order to add checksums to these classic directory blocks, a phony
 ``struct ext4_dir_entry`` is placed at the end of each leaf block to
 hold the checksum. The directory entry is 12 bytes long. The inode
@@ -322,6 +347,8 @@ The directory hash is one of the following values:
  - Half MD4, unsigned.
* - 0x5
  - Tea, unsigned.
+   * - 0x6
+ - Siphash.
 
 Interior nodes of an htree are recorded as ``struct dx_node``, which is
 also the full length of a data block:
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ca50c90adc4c..9da6db183d4f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -30,6 +30,8 @@
 #include "ext4.h"
 #include "xattr.h"
 
+#define DOTDOT_OFFSET 12
+
 static int ext4_dx_readdir(struct file *, struct dir_context *);
 
 /**
@@ -55,6 +57,19 @@ static int is_dx_dir(struct inode *inode)
return 0;
 }
 
+static bool is_fake_entry(struct inode *dir, ext4_lblk_t lblk,
+ unsigned int offset, unsigned int blocksize)
+{
+   /* Entries in the first block before this value refer to . or .. */
+   if (lblk == 0 && offset <= DOTDOT_OFFSET)
+   return true;
+   /* Check if this is likely the csum entry */
+   if (ext4_has_metadata_csum(dir->i_sb) && offset % blocksize ==
+   blocksize - sizeof(struct ext4_dir_entry_tail))
+   return true;
+   return false;
+}
+
 /*
  * Return 0 if the directory entry is OK, and 1 if there is a problem
  *
@@ -67,22 +82,28 @@ int __ext4_check_dir_entry(const char *function, unsigned 
int line,
   struct inode *dir, struct file *filp,
   struct ext4_dir_entry_2 *de,
   struct buffer_head *bh, char *buf, int size,
+  ext4_lblk_t lblk,
   unsigned int offset)
 {
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
const int next_offset = ((char *) de - buf) + rlen;
+   unsigned int blocksize = dir->i_sb->s_blocksize;
+   bool fake = is_fake_entry(dir, lblk, offset, blocksize);
+   bool next_fake = is_fake_entry(dir, lblk, next_offset, blocksize);
 
-   if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
+   if (unlikely(rlen < ext4_dir_rec_len(1, fake ? NULL : dir)))
error_msg = "rec_len is smaller than minimal";
else if (unlikely(rlen % 4 != 0))
error_msg = "rec_len % 4 != 0";
-   else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
+   else if (unlikely(rlen < ext4_dir_rec

[PATCH 2/2] ext4: Optimize match for casefolded encrypted dirs

2021-02-03 Thread Daniel Rosenberg
Matching names with casefolded encrypting directories requires
decrypting entries to confirm case since we are case preserving. We can
avoid needing to decrypt if our hash values don't match.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Paul Lawrence 
---
 fs/ext4/ext4.h  | 17 ---
 fs/ext4/namei.c | 55 ++---
 2 files changed, 38 insertions(+), 34 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 90a2c182e4d7..997f80cfe5df 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2637,9 +2637,9 @@ extern unsigned ext4_free_clusters_after_init(struct 
super_block *sb,
 ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
 
 #ifdef CONFIG_UNICODE
-extern void ext4_fname_setup_ci_filename(struct inode *dir,
+extern int ext4_fname_setup_ci_filename(struct inode *dir,
 const struct qstr *iname,
-struct fscrypt_str *fname);
+struct ext4_filename *fname);
 #endif
 
 #ifdef CONFIG_FS_ENCRYPTION
@@ -2670,9 +2670,9 @@ static inline int ext4_fname_setup_filename(struct inode 
*dir,
ext4_fname_from_fscrypt_name(fname, );
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, iname, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, iname, fname);
 #endif
-   return 0;
+   return err;
 }
 
 static inline int ext4_fname_prepare_lookup(struct inode *dir,
@@ -2689,9 +2689,9 @@ static inline int ext4_fname_prepare_lookup(struct inode 
*dir,
ext4_fname_from_fscrypt_name(fname, );
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, >d_name, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, >d_name, fname);
 #endif
-   return 0;
+   return err;
 }
 
 static inline void ext4_fname_free_filename(struct ext4_filename *fname)
@@ -2716,15 +2716,16 @@ static inline int ext4_fname_setup_filename(struct 
inode *dir,
int lookup,
struct ext4_filename *fname)
 {
+   int err = 0;
fname->usr_fname = iname;
fname->disk_name.name = (unsigned char *) iname->name;
fname->disk_name.len = iname->len;
 
 #ifdef CONFIG_UNICODE
-   ext4_fname_setup_ci_filename(dir, iname, >cf_name);
+   err = ext4_fname_setup_ci_filename(dir, iname, fname);
 #endif
 
-   return 0;
+   return err;
 }
 
 static inline int ext4_fname_prepare_lookup(struct inode *dir,
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 00b0b0cb4600..ff024bb613c0 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -799,7 +799,9 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
if (hinfo->hash_version <= DX_HASH_TEA)
hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
-   if (fname && fname_name(fname))
+   /* hash is already computed for encrypted casefolded directory */
+   if (fname && fname_name(fname) &&
+   !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
hash = hinfo->hash;
 
@@ -1364,19 +1366,21 @@ static int ext4_ci_compare(const struct inode *parent, 
const struct qstr *name,
return ret;
 }
 
-void ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
- struct fscrypt_str *cf_name)
+int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
+ struct ext4_filename *name)
 {
+   struct fscrypt_str *cf_name = >cf_name;
+   struct dx_hash_info *hinfo = >hinfo;
int len;
 
if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding) {
cf_name->name = NULL;
-   return;
+   return 0;
}
 
cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
if (!cf_name->name)
-   return;
+   return -ENOMEM;
 
len = utf8_casefold(dir->i_sb->s_encoding,
iname, cf_name->name,
@@ -1384,10 +1388,18 @@ void ext4_fname_setup_ci_filename(struct inode *dir, 
const struct qstr *iname,
if (len <= 0) {
kfree(cf_name->name);
cf_name->name = NULL;
-   return;
}
cf_name->len = (unsigned) len;
+   if (!IS_ENCRYPTED(dir))
+   return 0;
 
+   hinfo->hash_version = DX_HASH_SIPHASH;
+   hinfo->seed = NULL;
+   if (cf_name->name)
+   ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
+   else
+   ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
+   return 0;

[PATCH 0/2] Reconcile Encryption and Casefolding in Ext4

2021-02-03 Thread Daniel Rosenberg
These patches add support for ext4 encryption and casefolding at the same time.
Since the hash for encrypted casefolded directory names cannot be computed
without the key, we need to store the hash on disk. We only do so for encrypted
and casefolded directories to avoid on disk format changes.

e2fsprogs has already been updated with support for casefolding and encryption.

Daniel Rosenberg (2):
  ext4: Handle casefolding with encryption
  ext4: Optimize match for casefolded encrypted dirs

 Documentation/filesystems/ext4/directory.rst |  27 ++
 fs/ext4/dir.c|  46 ++-
 fs/ext4/ext4.h   |  79 +++--
 fs/ext4/hash.c   |  25 +-
 fs/ext4/ialloc.c |   5 +-
 fs/ext4/inline.c |  41 +--
 fs/ext4/namei.c  | 323 +--
 fs/ext4/super.c  |   6 -
 8 files changed, 391 insertions(+), 161 deletions(-)


base-commit: e9f53353e166a67dfe4f8295100f8ac39d6cf10b
-- 
2.30.0.365.g02bc693789-goog



Re: [PATCH v4 2/3] fscrypt: Have filesystems handle their d_ops

2020-11-25 Thread Daniel Rosenberg
>
> This change has the side-effect of removing the capability of the root
> directory from being case-insensitive.  It is not a backward
> incompatible change because there is no way to make the root directory
> CI at the moment (it is never empty). But this restriction seems
> artificial. Is there a real reason to prevent the root inode from being
> case-insensitive?

> I don't have a use case where I need a root directory to be CI.  In
> fact, when I first implemented CI, I did want to block the root directory
> from being made CI, just to prevent people from doing "chattr +F /" and
> complaining afterwards when /usr/lib breaks.
>
> My concern with the curent patch was whether this side-effect was
> considered, but I'm happy with either semantics.
>
> --
> Gabriel Krisman Bertazi

That's just from the lost+found directory right? If you remove it you
can still change it, and then add the lost+found directory back. Isn't
that how it works currently? I definitely didn't intend to change any
behavior around non-encrypted casefolding there.

I should look at what fsck does if you do that and have a LoSt+fOuNd folder...


-Daniel Rosenberg


[PATCH v4 0/3] Add support for Encryption and Casefolding in F2FS

2020-11-18 Thread Daniel Rosenberg
These patches are on top of the torvalds tree.

F2FS currently supports casefolding and encryption, but not at
the same time. These patches aim to rectify that. In a later follow up,
this will be added for Ext4 as well.

The f2fs-tools changes have already been applied.

Since both fscrypt and casefolding require their own dentry operations,
I've moved the responsibility of setting the dentry operations from fscrypt
to the filesystems and provided helper functions that should work for most
cases.

These are a follow-up to the previously sent patch set
"[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches"

v2:
Simplified generic dentry_op function
Passed through errors in f2fs_match_ci_name

v3:
Split some long lines
Cleaned up some code
Made some comments clearer
Fixed bug in v2 error passing

v4:
Added reviewed bys and acks from Eric
Removed unneeded variable
ifdef consistency

Daniel Rosenberg (3):
  libfs: Add generic function for setting dentry_ops
  fscrypt: Have filesystems handle their d_ops
  f2fs: Handle casefolding with Encryption

 fs/crypto/fname.c   |   4 --
 fs/crypto/fscrypt_private.h |   1 -
 fs/crypto/hooks.c   |   1 -
 fs/ext4/dir.c   |   7 ---
 fs/ext4/ext4.h  |   4 --
 fs/ext4/namei.c |   1 +
 fs/ext4/super.c |   5 --
 fs/f2fs/dir.c   | 105 ++--
 fs/f2fs/f2fs.h  |  11 ++--
 fs/f2fs/hash.c  |  11 +++-
 fs/f2fs/inline.c|   4 ++
 fs/f2fs/namei.c |   1 +
 fs/f2fs/recovery.c  |  12 -
 fs/f2fs/super.c |   7 ---
 fs/libfs.c  |  70 
 fs/ubifs/dir.c  |   1 +
 include/linux/fs.h  |   1 +
 include/linux/fscrypt.h |   7 ++-
 18 files changed, 185 insertions(+), 68 deletions(-)


base-commit: 0fa8ee0d9ab95c9350b8b84574824d9a384a9f7d
-- 
2.29.2.454.gaff20da3a2-goog



[PATCH v4 2/3] fscrypt: Have filesystems handle their d_ops

2020-11-18 Thread Daniel Rosenberg
This shifts the responsibility of setting up dentry operations from
fscrypt to the individual filesystems, allowing them to have their own
operations while still setting fscrypt's d_revalidate as appropriate.

Most filesystems can just use generic_set_encrypted_ci_d_ops, unless
they have their own specific dentry operations as well. That operation
will set the minimal d_ops required under the circumstances.

Since the fscrypt d_ops are set later on, we must set all d_ops there,
since we cannot adjust those later on. This should not result in any
change in behavior.

Signed-off-by: Daniel Rosenberg 
Acked-by: Eric Biggers 
---
 fs/crypto/fname.c   | 4 
 fs/crypto/fscrypt_private.h | 1 -
 fs/crypto/hooks.c   | 1 -
 fs/ext4/dir.c   | 7 ---
 fs/ext4/ext4.h  | 4 
 fs/ext4/namei.c | 1 +
 fs/ext4/super.c | 5 -
 fs/f2fs/dir.c   | 7 ---
 fs/f2fs/f2fs.h  | 3 ---
 fs/f2fs/namei.c | 1 +
 fs/f2fs/super.c | 1 -
 fs/ubifs/dir.c  | 1 +
 include/linux/fscrypt.h | 7 +--
 13 files changed, 8 insertions(+), 35 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 1fbe6c24d705..cb3cfa6329ba 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -570,7 +570,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned 
int flags)
return valid;
 }
 EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
-
-const struct dentry_operations fscrypt_d_ops = {
-   .d_revalidate = fscrypt_d_revalidate,
-};
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index 4f5806a3b73d..df9c48c1fbf7 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -294,7 +294,6 @@ int fscrypt_fname_encrypt(const struct inode *inode, const 
struct qstr *iname,
 bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
  u32 orig_len, u32 max_len,
  u32 *encrypted_len_ret);
-extern const struct dentry_operations fscrypt_d_ops;
 
 /* hkdf.c */
 
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 20b0df47fe6a..9006fa983335 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct 
dentry *dentry,
spin_lock(>d_lock);
dentry->d_flags |= DCACHE_NOKEY_NAME;
spin_unlock(>d_lock);
-   d_set_d_op(dentry, _d_ops);
}
return err;
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ca50c90adc4c..e757319a4472 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -667,10 +667,3 @@ const struct file_operations ext4_dir_operations = {
.open   = ext4_dir_open,
.release= ext4_release_dir,
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf9429484462..ad77f01d9e20 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3380,10 +3380,6 @@ static inline void ext4_unlock_group(struct super_block 
*sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 33509266f5a0..12a417ff5648 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1614,6 +1614,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode 
*dir,
struct buffer_head *bh;
 
err = ext4_fname_prepare_lookup(dir, dentry, );
+   generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6633b20224d5..0288bedf46e1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4968,11 +4968,6 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount4;
}
 
-#ifdef CONFIG_UNICODE
-   if (sb->s_encoding)
-   sb->s_d_op = _dentry_ops;
-#endif
-
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a..71fdf5076461 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1099,10 +1099,3 @@ const struct file_operations f2fs_dir_operations = {
.compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations f2fs_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cb700d797296..62b4f31d30e2 10064

[PATCH v4 3/3] f2fs: Handle casefolding with Encryption

2020-11-18 Thread Daniel Rosenberg
Expand f2fs's casefolding support to include encrypted directories.  To
index casefolded+encrypted directories, we use the SipHash of the
casefolded name, keyed by a key derived from the directory's fscrypt
master key.  This ensures that the dirhash doesn't leak information
about the plaintext filenames.

Encryption keys are unavailable during roll-forward recovery, so we
can't compute the dirhash when recovering a new dentry in an encrypted +
casefolded directory.  To avoid having to force a checkpoint when a new
file is fsync'ed, store the dirhash on-disk appended to i_name.

This patch incorporates work by Eric Biggers 
and Jaegeuk Kim .

Co-developed-by: Eric Biggers 
Signed-off-by: Eric Biggers 
Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/f2fs/dir.c  | 98 +++---
 fs/f2fs/f2fs.h |  8 ++--
 fs/f2fs/hash.c | 11 +-
 fs/f2fs/inline.c   |  4 ++
 fs/f2fs/recovery.c | 12 +-
 fs/f2fs/super.c|  6 ---
 6 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 71fdf5076461..82b58d1f80eb 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  * http://www.samsung.com/
  */
+#include 
 #include 
 #include 
 #include 
@@ -206,30 +207,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 /*
  * Test whether a case-insensitive directory entry matches the filename
  * being searched for.
+ *
+ * Returns 1 for a match, 0 for no match, and -errno on an error.
  */
-static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
+static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
   const u8 *de_name, u32 de_name_len)
 {
const struct super_block *sb = dir->i_sb;
const struct unicode_map *um = sb->s_encoding;
+   struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
+   if (IS_ENCRYPTED(dir)) {
+   const struct fscrypt_str encrypted_name =
+   FSTR_INIT((u8 *)de_name, de_name_len);
+
+   if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+   return -EINVAL;
+
+   decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+   if (!decrypted_name.name)
+   return -ENOMEM;
+   res = fscrypt_fname_disk_to_usr(dir, 0, 0, _name,
+   _name);
+   if (res < 0)
+   goto out;
+   entry.name = decrypted_name.name;
+   entry.len = decrypted_name.len;
+   }
+
res = utf8_strncasecmp_folded(um, name, );
-   if (res < 0) {
-   /*
-* In strict mode, ignore invalid names.  In non-strict mode,
-* fall back to treating them as opaque byte sequences.
-*/
-   if (sb_has_strict_encoding(sb) || name->len != entry.len)
-   return false;
-   return !memcmp(name->name, entry.name, name->len);
+   /*
+* In strict mode, ignore invalid names.  In non-strict mode,
+* fall back to treating them as opaque byte sequences.
+*/
+   if (res < 0 && !sb_has_strict_encoding(sb)) {
+   res = name->len == entry.len &&
+   memcmp(name->name, entry.name, name->len) == 0;
+   } else {
+   /* utf8_strncasecmp_folded returns 0 on match */
+   res = (res == 0);
}
-   return res == 0;
+out:
+   kfree(decrypted_name.name);
+   return res;
 }
 #endif /* CONFIG_UNICODE */
 
-static inline bool f2fs_match_name(const struct inode *dir,
+static inline int f2fs_match_name(const struct inode *dir,
   const struct f2fs_filename *fname,
   const u8 *de_name, u32 de_name_len)
 {
@@ -256,6 +282,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct 
f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
+   int res = 0;
 
if (max_slots)
*max_slots = 0;
@@ -273,10 +300,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const 
struct f2fs_dentry_ptr *d,
continue;
}
 
-   if (de->hash_code == fname->hash &&
-   f2fs_match_name(d->inode, fname, d->filename[bit_pos],
-   le16_to_cpu(de->name_len)))
-   goto found;
+   if (de->hash_code == fname->hash) {
+   res = f2fs_match_name(d->inode, fname,
+ d

[PATCH v4 1/3] libfs: Add generic function for setting dentry_ops

2020-11-18 Thread Daniel Rosenberg
This adds a function to set dentry operations at lookup time that will
work for both encrypted filenames and casefolded filenames.

A filesystem that supports both features simultaneously can use this
function during lookup preparations to set up its dentry operations once
fscrypt no longer does that itself.

Currently the casefolding dentry operation are always set if the
filesystem defines an encoding because the features is toggleable on
empty directories. Unlike in the encryption case, the dentry operations
used come from the parent. Since we don't know what set of functions
we'll eventually need, and cannot change them later, we enable the
casefolding operations if the filesystem supports them at all.

By splitting out the various cases, we support as few dentry operations
as we can get away with, maximizing compatibility with overlayfs, which
will not function if a filesystem supports certain dentry_operations.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/libfs.c | 70 ++
 include/linux/fs.h |  1 +
 2 files changed, 71 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..bac918699022 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1449,4 +1449,74 @@ int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str)
return 0;
 }
 EXPORT_SYMBOL(generic_ci_d_hash);
+
+static const struct dentry_operations generic_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+};
+#endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+/**
+ * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
+ * @dentry:dentry to set ops on
+ *
+ * Casefolded directories need d_hash and d_compare set, so that the dentries
+ * contained in them are handled case-insensitively.  Note that these 
operations
+ * are needed on the parent directory rather than on the dentries in it, and
+ * while the casefolding flag can be toggled on and off on an empty directory,
+ * dentry_operations can't be changed later.  As a result, if the filesystem 
has
+ * casefolding support enabled at all, we have to give all dentries the
+ * casefolding operations even if their inode doesn't have the casefolding flag
+ * currently (and thus the casefolding ops would be no-ops for now).
+ *
+ * Encryption works differently in that the only dentry operation it needs is
+ * d_revalidate, which it only needs on dentries that have the no-key name 
flag.
+ * The no-key flag can't be set "later", so we don't have to worry about that.
+ *
+ * Finally, to maximize compatibility with overlayfs (which isn't compatible
+ * with certain dentry operations) and to avoid taking an unnecessary
+ * performance hit, we use custom dentry_operations for each possible
+ * combination rather than always installing all operations.
+ */
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+   bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
+#endif
+#ifdef CONFIG_UNICODE
+   bool needs_ci_ops = dentry->d_sb->s_encoding;
+#endif
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+   if (needs_encrypt_ops && needs_ci_ops) {
+   d_set_d_op(dentry, _encrypted_ci_dentry_ops);
+   return;
+   }
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+   if (needs_encrypt_ops) {
+   d_set_d_op(dentry, _encrypted_dentry_ops);
+   return;
+   }
+#endif
+#ifdef CONFIG_UNICODE
+   if (needs_ci_ops) {
+   d_set_d_op(dentry, _ci_dentry_ops);
+   return;
+   }
+#endif
+}
+EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8667d0cdc71e..11345e66353b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3202,6 +3202,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str);
 extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name);
 #endif
+extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-- 
2.29.2.454.gaff20da3a2-goog



[PATCH v3 3/3] f2fs: Handle casefolding with Encryption

2020-11-17 Thread Daniel Rosenberg
Expand f2fs's casefolding support to include encrypted directories.  To
index casefolded+encrypted directories, we use the SipHash of the
casefolded name, keyed by a key derived from the directory's fscrypt
master key.  This ensures that the dirhash doesn't leak information
about the plaintext filenames.

Encryption keys are unavailable during roll-forward recovery, so we
can't compute the dirhash when recovering a new dentry in an encrypted +
casefolded directory.  To avoid having to force a checkpoint when a new
file is fsync'ed, store the dirhash on-disk appended to i_name.

This patch incorporates work by Eric Biggers 
and Jaegeuk Kim .

Co-developed-by: Eric Biggers 
Signed-off-by: Eric Biggers 
Signed-off-by: Daniel Rosenberg 
---
 fs/f2fs/dir.c  | 98 +++---
 fs/f2fs/f2fs.h |  8 ++--
 fs/f2fs/hash.c | 11 +-
 fs/f2fs/inline.c   |  4 ++
 fs/f2fs/recovery.c | 12 +-
 fs/f2fs/super.c|  6 ---
 6 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 71fdf5076461..82b58d1f80eb 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  * http://www.samsung.com/
  */
+#include 
 #include 
 #include 
 #include 
@@ -206,30 +207,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 /*
  * Test whether a case-insensitive directory entry matches the filename
  * being searched for.
+ *
+ * Returns 1 for a match, 0 for no match, and -errno on an error.
  */
-static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
+static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
   const u8 *de_name, u32 de_name_len)
 {
const struct super_block *sb = dir->i_sb;
const struct unicode_map *um = sb->s_encoding;
+   struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
+   if (IS_ENCRYPTED(dir)) {
+   const struct fscrypt_str encrypted_name =
+   FSTR_INIT((u8 *)de_name, de_name_len);
+
+   if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+   return -EINVAL;
+
+   decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+   if (!decrypted_name.name)
+   return -ENOMEM;
+   res = fscrypt_fname_disk_to_usr(dir, 0, 0, _name,
+   _name);
+   if (res < 0)
+   goto out;
+   entry.name = decrypted_name.name;
+   entry.len = decrypted_name.len;
+   }
+
res = utf8_strncasecmp_folded(um, name, );
-   if (res < 0) {
-   /*
-* In strict mode, ignore invalid names.  In non-strict mode,
-* fall back to treating them as opaque byte sequences.
-*/
-   if (sb_has_strict_encoding(sb) || name->len != entry.len)
-   return false;
-   return !memcmp(name->name, entry.name, name->len);
+   /*
+* In strict mode, ignore invalid names.  In non-strict mode,
+* fall back to treating them as opaque byte sequences.
+*/
+   if (res < 0 && !sb_has_strict_encoding(sb)) {
+   res = name->len == entry.len &&
+   memcmp(name->name, entry.name, name->len) == 0;
+   } else {
+   /* utf8_strncasecmp_folded returns 0 on match */
+   res = (res == 0);
}
-   return res == 0;
+out:
+   kfree(decrypted_name.name);
+   return res;
 }
 #endif /* CONFIG_UNICODE */
 
-static inline bool f2fs_match_name(const struct inode *dir,
+static inline int f2fs_match_name(const struct inode *dir,
   const struct f2fs_filename *fname,
   const u8 *de_name, u32 de_name_len)
 {
@@ -256,6 +282,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct 
f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
+   int res = 0;
 
if (max_slots)
*max_slots = 0;
@@ -273,10 +300,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const 
struct f2fs_dentry_ptr *d,
continue;
}
 
-   if (de->hash_code == fname->hash &&
-   f2fs_match_name(d->inode, fname, d->filename[bit_pos],
-   le16_to_cpu(de->name_len)))
-   goto found;
+   if (de->hash_code == fname->hash) {
+   res = f2fs_match_name(d->inode, fname,
+ d->filen

[PATCH v3 2/3] fscrypt: Have filesystems handle their d_ops

2020-11-17 Thread Daniel Rosenberg
This shifts the responsibility of setting up dentry operations from
fscrypt to the individual filesystems, allowing them to have their own
operations while still setting fscrypt's d_revalidate as appropriate.

Most filesystems can just use generic_set_encrypted_ci_d_ops, unless
they have their own specific dentry operations as well. That operation
will set the minimal d_ops required under the circumstances.

Since the fscrypt d_ops are set later on, we must set all d_ops there,
since we cannot adjust those later on. This should not result in any
change in behavior.

Signed-off-by: Daniel Rosenberg 
---
 fs/crypto/fname.c   | 4 
 fs/crypto/hooks.c   | 1 -
 fs/ext4/dir.c   | 7 ---
 fs/ext4/ext4.h  | 4 
 fs/ext4/namei.c | 1 +
 fs/ext4/super.c | 5 -
 fs/f2fs/dir.c   | 7 ---
 fs/f2fs/f2fs.h  | 3 ---
 fs/f2fs/namei.c | 1 +
 fs/f2fs/super.c | 1 -
 fs/ubifs/dir.c  | 1 +
 include/linux/fscrypt.h | 7 +--
 12 files changed, 8 insertions(+), 34 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 1fbe6c24d705..cb3cfa6329ba 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -570,7 +570,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned 
int flags)
return valid;
 }
 EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
-
-const struct dentry_operations fscrypt_d_ops = {
-   .d_revalidate = fscrypt_d_revalidate,
-};
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 20b0df47fe6a..9006fa983335 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct 
dentry *dentry,
spin_lock(>d_lock);
dentry->d_flags |= DCACHE_NOKEY_NAME;
spin_unlock(>d_lock);
-   d_set_d_op(dentry, _d_ops);
}
return err;
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ca50c90adc4c..e757319a4472 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -667,10 +667,3 @@ const struct file_operations ext4_dir_operations = {
.open   = ext4_dir_open,
.release= ext4_release_dir,
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf9429484462..ad77f01d9e20 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3380,10 +3380,6 @@ static inline void ext4_unlock_group(struct super_block 
*sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 33509266f5a0..12a417ff5648 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1614,6 +1614,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode 
*dir,
struct buffer_head *bh;
 
err = ext4_fname_prepare_lookup(dir, dentry, );
+   generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6633b20224d5..0288bedf46e1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4968,11 +4968,6 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount4;
}
 
-#ifdef CONFIG_UNICODE
-   if (sb->s_encoding)
-   sb->s_d_op = _dentry_ops;
-#endif
-
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a..71fdf5076461 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1099,10 +1099,3 @@ const struct file_operations f2fs_dir_operations = {
.compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations f2fs_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cb700d797296..62b4f31d30e2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3767,9 +3767,6 @@ static inline void f2fs_update_sit_info(struct 
f2fs_sb_info *sbi) {}
 #endif
 
 extern const struct file_operations f2fs_dir_operations;
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations f2fs_dentry_ops;
-#endif
 extern const struct file_operations f2fs_file_operations;
 extern const struct inode_operations f2fs_file_inode_operations;
 extern const struct address_space_operations f2fs_dblock_aops;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 8fa37d1434de..6edb1ab579a1 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -497,6 +497,7 @@ static struct d

[PATCH v3 1/3] libfs: Add generic function for setting dentry_ops

2020-11-17 Thread Daniel Rosenberg
This adds a function to set dentry operations at lookup time that will
work for both encrypted filenames and casefolded filenames.

A filesystem that supports both features simultaneously can use this
function during lookup preparations to set up its dentry operations once
fscrypt no longer does that itself.

Currently the casefolding dentry operation are always set if the
filesystem defines an encoding because the features is toggleable on
empty directories. Unlike in the encryption case, the dentry operations
used come from the parent. Since we don't know what set of functions
we'll eventually need, and cannot change them later, we enable the
casefolding operations if the filesystem supports them at all.

By splitting out the various cases, we support as few dentry operations
as we can get away with, maximizing compatibility with overlayfs, which
will not function if a filesystem supports certain dentry_operations.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 70 ++
 include/linux/fs.h |  1 +
 2 files changed, 71 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..babef1f7b50e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1449,4 +1449,74 @@ int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str)
return 0;
 }
 EXPORT_SYMBOL(generic_ci_d_hash);
+
+static const struct dentry_operations generic_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+};
+#endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+/**
+ * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
+ * @dentry:dentry to set ops on
+ *
+ * Casefolded directories need d_hash and d_compare set, so that the dentries
+ * contained in them are handled case-insensitively.  Note that these 
operations
+ * are needed on the parent directory rather than on the dentries in it, and
+ * while the casefolding flag can be toggled on and off on an empty directory,
+ * dentry_operations can't be changed later.  As a result, if the filesystem 
has
+ * casefolding support enabled at all, we have to give all dentries the
+ * casefolding operations even if their inode doesn't have the casefolding flag
+ * currently (and thus the casefolding ops would be no-ops for now).
+ *
+ * Encryption works differently in that the only dentry operation it needs is
+ * d_revalidate, which it only needs on dentries that have the no-key name 
flag.
+ * The no-key flag can't be set "later", so we don't have to worry about that.
+ *
+ * Finally, to maximize compatibility with overlayfs (which isn't compatible
+ * with certain dentry operations) and to avoid taking an unnecessary
+ * performance hit, we use custom dentry_operations for each possible
+ * combination rather than always installing all operations.
+ */
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+   bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
+#endif
+#ifdef CONFIG_UNICODE
+   bool needs_ci_ops = dentry->d_sb->s_encoding;
+#endif
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+   if (needs_encrypt_ops && needs_ci_ops) {
+   d_set_d_op(dentry, _encrypted_ci_dentry_ops);
+   return;
+   }
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+   if (needs_encrypt_ops) {
+   d_set_d_op(dentry, _encrypted_dentry_ops);
+   return;
+   }
+#endif
+#ifdef CONFIG_UNICODE
+   if (needs_ci_ops) {
+   d_set_d_op(dentry, _ci_dentry_ops);
+   return;
+   }
+#endif
+}
+EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8667d0cdc71e..11345e66353b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3202,6 +3202,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str);
 extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name);
 #endif
+extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-- 
2.29.2.454.gaff20da3a2-goog



[PATCH v3 0/3] Add support for Encryption and Casefolding in F2FS

2020-11-17 Thread Daniel Rosenberg
These patches are on top of the torvalds tree.

F2FS currently supports casefolding and encryption, but not at
the same time. These patches aim to rectify that. In a later follow up,
this will be added for Ext4 as well.

The f2fs-tools changes have already been applied.

Since both fscrypt and casefolding require their own dentry operations,
I've moved the responsibility of setting the dentry operations from fscrypt
to the filesystems and provided helper functions that should work for most
cases.

These are a follow-up to the previously sent patch set
"[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches"

v2:
Simplified generic dentry_op function
Passed through errors in f2fs_match_ci_name

v3:
Split some long lines
Cleaned up some code
Made some comments clearer
Fixed bug in v2 error passing

Daniel Rosenberg (3):
  libfs: Add generic function for setting dentry_ops
  fscrypt: Have filesystems handle their d_ops
  f2fs: Handle casefolding with Encryption

 fs/crypto/fname.c   |   4 --
 fs/crypto/hooks.c   |   1 -
 fs/ext4/dir.c   |   7 ---
 fs/ext4/ext4.h  |   4 --
 fs/ext4/namei.c |   1 +
 fs/ext4/super.c |   5 --
 fs/f2fs/dir.c   | 105 +---
 fs/f2fs/f2fs.h  |  11 ++---
 fs/f2fs/hash.c  |  11 -
 fs/f2fs/inline.c|   4 ++
 fs/f2fs/namei.c |   1 +
 fs/f2fs/recovery.c  |  12 -
 fs/f2fs/super.c |   7 ---
 fs/libfs.c  |  70 +++
 fs/ubifs/dir.c  |   1 +
 include/linux/fs.h  |   1 +
 include/linux/fscrypt.h |   7 ++-
 17 files changed, 185 insertions(+), 67 deletions(-)


base-commit: 0fa8ee0d9ab95c9350b8b84574824d9a384a9f7d
-- 
2.29.2.454.gaff20da3a2-goog



Re: [PATCH v2 3/3] f2fs: Handle casefolding with Encryption

2020-11-17 Thread Daniel Rosenberg
On Tue, Nov 17, 2020 at 10:50 AM Eric Biggers  wrote:
>
>
> What is the assignment to dentry_page supposed to be accomplishing?  It looks
> like it's meant to pass up errors from f2fs_find_target_dentry(), but it 
> doesn't
> do that.

Woops. Fixed that for the next version.

>
> > @@ -222,14 +250,20 @@ static bool f2fs_match_ci_name(const struct inode 
> > *dir, const struct qstr *name,
> >* fall back to treating them as opaque byte sequences.
> >*/
> >   if (sb_has_strict_encoding(sb) || name->len != entry.len)
> > - return false;
> > - return !memcmp(name->name, entry.name, name->len);
> > + res = 0;
> > + else
> > + res = memcmp(name->name, entry.name, name->len) == 0;
> > + } else {
> > + /* utf8_strncasecmp_folded returns 0 on match */
> > + res = (res == 0);
> >   }
>
> The following might be easier to understand:
>
> /*
>  * In strict mode, ignore invalid names.  In non-strict mode, fall 
> back
>  * to treating them as opaque byte sequences.
>  */
> if (res < 0 && !sb_has_strict_encoding(sb)) {
> res = name->len == entry.len &&
>   memcmp(name->name, entry.name, name->len) == 0;
> } else {
> /* utf8_strncasecmp_folded returns 0 on match */
> res = (res == 0);
> }
>
Thanks, that is a fair bit nicer.

> > @@ -273,10 +308,14 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const 
> > struct f2fs_dentry_ptr *d,
> >   continue;
> >   }
> >
> > - if (de->hash_code == fname->hash &&
> > - f2fs_match_name(d->inode, fname, d->filename[bit_pos],
> > - le16_to_cpu(de->name_len)))
> > - goto found;
> > + if (de->hash_code == fname->hash) {
> > + res = f2fs_match_name(d->inode, fname, 
> > d->filename[bit_pos],
> > + le16_to_cpu(de->name_len));
> > + if (res < 0)
> > + return ERR_PTR(res);
> > + else if (res)
> > + goto found;
> > + }
>
> Overly long line here.  Also 'else if' is unnecessary, just use 'if'.
>
> - Eric
The 0 case is important, since that reflects that the name was not found.
-Daniel


[PATCH v2 0/3] Add support for Encryption and Casefolding in F2FS

2020-11-16 Thread Daniel Rosenberg
These patches are on top of the torvalds tree.

F2FS currently supports casefolding and encryption, but not at
the same time. These patches aim to rectify that. In a later follow up,
this will be added for Ext4 as well. I've included one ext4 patch from
the previous set since it isn't in the f2fs branch, but is needed for the
fscrypt changes.

The f2fs-tools changes have already been applied.

Since both fscrypt and casefolding require their own dentry operations,
I've moved the responsibility of setting the dentry operations from fscrypt
to the filesystems and provided helper functions that should work for most
cases.

These are a follow-up to the previously sent patch set
"[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches"

v2:
Simplified generic dentry_op function
Passed through errors in f2fs_match_ci_name

Daniel Rosenberg (3):
  libfs: Add generic function for setting dentry_ops
  fscrypt: Have filesystems handle their d_ops
  f2fs: Handle casefolding with Encryption

 fs/crypto/fname.c   |  4 --
 fs/crypto/hooks.c   |  1 -
 fs/ext4/dir.c   |  7 ---
 fs/ext4/ext4.h  |  4 --
 fs/ext4/namei.c |  1 +
 fs/ext4/super.c |  5 ---
 fs/f2fs/dir.c   | 96 +++--
 fs/f2fs/f2fs.h  | 11 +++--
 fs/f2fs/hash.c  | 11 -
 fs/f2fs/inline.c|  4 ++
 fs/f2fs/namei.c |  1 +
 fs/f2fs/recovery.c  | 12 +-
 fs/f2fs/super.c |  7 ---
 fs/libfs.c  | 60 ++
 fs/ubifs/dir.c  |  1 +
 include/linux/fs.h  |  1 +
 include/linux/fscrypt.h |  5 ++-
 17 files changed, 170 insertions(+), 61 deletions(-)

-- 
2.29.2.299.gdc1121823c-goog



[PATCH v2 1/3] libfs: Add generic function for setting dentry_ops

2020-11-16 Thread Daniel Rosenberg
This adds a function to set dentry operations at lookup time that will
work for both encrypted filenames and casefolded filenames.

A filesystem that supports both features simultaneously can use this
function during lookup preparations to set up its dentry operations once
fscrypt no longer does that itself.

Currently the casefolding dentry operation are always set if the
filesystem defines an encoding because the features is toggleable on
empty directories. Since we don't know what set of functions we'll
eventually need, and cannot change them later, we add just add them.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 60 ++
 include/linux/fs.h |  1 +
 2 files changed, 61 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..dd8504f3ff5d 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1449,4 +1449,64 @@ int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str)
return 0;
 }
 EXPORT_SYMBOL(generic_ci_d_hash);
+
+static const struct dentry_operations generic_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+};
 #endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+/**
+ * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
+ * @dentry:dentry to set ops on
+ *
+ * This function sets the dentry ops for the given dentry to handle both
+ * casefolded and encrypted dentry names.
+ *
+ * Encryption requires d_revalidate to remove nokey names once the key is 
present.
+ * Casefolding is toggleable on an empty directory. Since we can't change the
+ * operations later on, we just add the casefolding ops if the filesystem 
defines an
+ * encoding.
+ */
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+   bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
+#endif
+#ifdef CONFIG_UNICODE
+   bool needs_ci_ops = dentry->d_sb->s_encoding;
+#endif
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+   if (needs_encrypt_ops && needs_ci_ops) {
+   d_set_d_op(dentry, _encrypted_ci_dentry_ops);
+   return;
+   }
+#endif
+#ifdef CONFIG_FS_ENCRYPTION
+   if (needs_encrypt_ops) {
+   d_set_d_op(dentry, _encrypted_dentry_ops);
+   return;
+   }
+#endif
+#ifdef CONFIG_UNICODE
+   if (needs_ci_ops) {
+   d_set_d_op(dentry, _ci_dentry_ops);
+   return;
+   }
+#endif
+}
+EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8667d0cdc71e..11345e66353b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3202,6 +3202,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str);
 extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name);
 #endif
+extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-- 
2.29.2.299.gdc1121823c-goog



[PATCH v2 2/3] fscrypt: Have filesystems handle their d_ops

2020-11-16 Thread Daniel Rosenberg
This shifts the responsibility of setting up dentry operations from
fscrypt to the individual filesystems, allowing them to have their own
operations while still setting fscrypt's d_revalidate as appropriate.

Most filesystems can just use generic_set_encrypted_ci_d_ops, unless
they have their own specific dentry operations as well. That operation
will set the minimal d_ops required under the circumstances.

Since the fscrypt d_ops are set later on, we must set all d_ops there,
since we cannot adjust those later on. This should not result in any
change in behavior.

Signed-off-by: Daniel Rosenberg 
---
 fs/crypto/fname.c   | 4 
 fs/crypto/hooks.c   | 1 -
 fs/ext4/dir.c   | 7 ---
 fs/ext4/ext4.h  | 4 
 fs/ext4/namei.c | 1 +
 fs/ext4/super.c | 5 -
 fs/f2fs/dir.c   | 7 ---
 fs/f2fs/f2fs.h  | 3 ---
 fs/f2fs/namei.c | 1 +
 fs/f2fs/super.c | 1 -
 fs/ubifs/dir.c  | 1 +
 include/linux/fscrypt.h | 5 +++--
 12 files changed, 6 insertions(+), 34 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 1fbe6c24d705..cb3cfa6329ba 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -570,7 +570,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned 
int flags)
return valid;
 }
 EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
-
-const struct dentry_operations fscrypt_d_ops = {
-   .d_revalidate = fscrypt_d_revalidate,
-};
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 20b0df47fe6a..9006fa983335 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -117,7 +117,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct 
dentry *dentry,
spin_lock(>d_lock);
dentry->d_flags |= DCACHE_NOKEY_NAME;
spin_unlock(>d_lock);
-   d_set_d_op(dentry, _d_ops);
}
return err;
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index ca50c90adc4c..e757319a4472 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -667,10 +667,3 @@ const struct file_operations ext4_dir_operations = {
.open   = ext4_dir_open,
.release= ext4_release_dir,
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index bf9429484462..ad77f01d9e20 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3380,10 +3380,6 @@ static inline void ext4_unlock_group(struct super_block 
*sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 33509266f5a0..12a417ff5648 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1614,6 +1614,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode 
*dir,
struct buffer_head *bh;
 
err = ext4_fname_prepare_lookup(dir, dentry, );
+   generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6633b20224d5..0288bedf46e1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4968,11 +4968,6 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount4;
}
 
-#ifdef CONFIG_UNICODE
-   if (sb->s_encoding)
-   sb->s_d_op = _dentry_ops;
-#endif
-
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 4b9ef8bbfa4a..71fdf5076461 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1099,10 +1099,3 @@ const struct file_operations f2fs_dir_operations = {
.compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations f2fs_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cb700d797296..62b4f31d30e2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3767,9 +3767,6 @@ static inline void f2fs_update_sit_info(struct 
f2fs_sb_info *sbi) {}
 #endif
 
 extern const struct file_operations f2fs_dir_operations;
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations f2fs_dentry_ops;
-#endif
 extern const struct file_operations f2fs_file_operations;
 extern const struct inode_operations f2fs_file_inode_operations;
 extern const struct address_space_operations f2fs_dblock_aops;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 8fa37d1434de..6edb1ab579a1 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -497,6 +497,7 @@ static struct dentry

[PATCH v2 3/3] f2fs: Handle casefolding with Encryption

2020-11-16 Thread Daniel Rosenberg
Expand f2fs's casefolding support to include encrypted directories.  To
index casefolded+encrypted directories, we use the SipHash of the
casefolded name, keyed by a key derived from the directory's fscrypt
master key.  This ensures that the dirhash doesn't leak information
about the plaintext filenames.

Encryption keys are unavailable during roll-forward recovery, so we
can't compute the dirhash when recovering a new dentry in an encrypted +
casefolded directory.  To avoid having to force a checkpoint when a new
file is fsync'ed, store the dirhash on-disk appended to i_name.

This patch incorporates work by Eric Biggers 
and Jaegeuk Kim .

Co-developed-by: Eric Biggers 
Signed-off-by: Eric Biggers 
Signed-off-by: Daniel Rosenberg 
---
 fs/f2fs/dir.c  | 89 +-
 fs/f2fs/f2fs.h |  8 +++--
 fs/f2fs/hash.c | 11 +-
 fs/f2fs/inline.c   |  4 +++
 fs/f2fs/recovery.c | 12 ++-
 fs/f2fs/super.c|  6 
 6 files changed, 103 insertions(+), 27 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 71fdf5076461..0adc6bcfb5c0 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  * http://www.samsung.com/
  */
+#include 
 #include 
 #include 
 #include 
@@ -195,26 +196,53 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 {
struct f2fs_dentry_block *dentry_blk;
struct f2fs_dentry_ptr d;
+   struct f2fs_dir_entry *res;
 
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
 
make_dentry_ptr_block(dir, , dentry_blk);
-   return f2fs_find_target_dentry(, fname, max_slots);
+   res = f2fs_find_target_dentry(, fname, max_slots);
+   if (IS_ERR(res)) {
+   dentry_page = ERR_CAST(res);
+   res = NULL;
+   }
+   return res;
 }
 
 #ifdef CONFIG_UNICODE
 /*
  * Test whether a case-insensitive directory entry matches the filename
  * being searched for.
+ *
+ * Returns 1 for a match, 0 for no match, and -errno on an error.
  */
-static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
+static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
   const u8 *de_name, u32 de_name_len)
 {
const struct super_block *sb = dir->i_sb;
const struct unicode_map *um = sb->s_encoding;
+   struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
+   if (IS_ENCRYPTED(dir)) {
+   const struct fscrypt_str encrypted_name =
+   FSTR_INIT((u8 *)de_name, de_name_len);
+
+   if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+   return -EINVAL;
+
+   decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+   if (!decrypted_name.name)
+   return -ENOMEM;
+   res = fscrypt_fname_disk_to_usr(dir, 0, 0, _name,
+   _name);
+   if (res < 0)
+   goto out;
+   entry.name = decrypted_name.name;
+   entry.len = decrypted_name.len;
+   }
+
res = utf8_strncasecmp_folded(um, name, );
if (res < 0) {
/*
@@ -222,14 +250,20 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * fall back to treating them as opaque byte sequences.
 */
if (sb_has_strict_encoding(sb) || name->len != entry.len)
-   return false;
-   return !memcmp(name->name, entry.name, name->len);
+   res = 0;
+   else
+   res = memcmp(name->name, entry.name, name->len) == 0;
+   } else {
+   /* utf8_strncasecmp_folded returns 0 on match */
+   res = (res == 0);
}
-   return res == 0;
+out:
+   kfree(decrypted_name.name);
+   return res;
 }
 #endif /* CONFIG_UNICODE */
 
-static inline bool f2fs_match_name(const struct inode *dir,
+static inline int f2fs_match_name(const struct inode *dir,
   const struct f2fs_filename *fname,
   const u8 *de_name, u32 de_name_len)
 {
@@ -256,6 +290,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct 
f2fs_dentry_ptr *d,
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
+   int res = 0;
 
if (max_slots)
*max_slots = 0;
@@ -273,10 +308,14 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const 
struct f2fs_dentry_ptr *d,
continue;
}
 
-   if (de->hash_code == fname->hash &&
-   f2fs_match_

[PATCH] ext4: Use generic casefolding support

2020-10-28 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in libfs.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5b81f3b080ee..ca50c90adc4c 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = d_inode_rcu(parent);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = d_inode_rcu(dentry);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 254d1c26bea8..662d0f40431e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1443,14 +1443,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1500,10 +1492,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 2924261226e0..a92eb79de0cc 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5159830dacb8..f458d1d81d96 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1285,8 +1285,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   const struct unicode_map *um = sbi->

[PATCH 5/5] f2fs: Handle casefolding with Encryption

2020-09-22 Thread Daniel Rosenberg
Expand f2fs's casefolding support to include encrypted directories.  To
index casefolded+encrypted directories, we use the SipHash of the
casefolded name, keyed by a key derived from the directory's fscrypt
master key.  This ensures that the dirhash doesn't leak information
about the plaintext filenames.

Encryption keys are unavailable during roll-forward recovery, so we
can't compute the dirhash when recovering a new dentry in an encrypted +
casefolded directory.  To avoid having to force a checkpoint when a new
file is fsync'ed, store the dirhash on-disk appended to i_name.

This patch incorporates work by Eric Biggers 
and Jaegeuk Kim .

Co-developed-by: Eric Biggers 
Signed-off-by: Eric Biggers 
Signed-off-by: Daniel Rosenberg 
---
 fs/f2fs/dir.c  | 57 +++---
 fs/f2fs/f2fs.h |  8 ---
 fs/f2fs/hash.c | 11 -
 fs/f2fs/recovery.c | 12 +-
 fs/f2fs/super.c|  6 -
 5 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 0766e6250a88..07004eb6edf8 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  * http://www.samsung.com/
  */
+#include 
 #include 
 #include 
 #include 
@@ -218,9 +219,28 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 {
const struct super_block *sb = dir->i_sb;
const struct unicode_map *um = sb->s_encoding;
+   struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
+   if (IS_ENCRYPTED(dir)) {
+   const struct fscrypt_str encrypted_name =
+   FSTR_INIT((u8 *)de_name, de_name_len);
+
+   if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+   return false;
+
+   decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+   if (!decrypted_name.name)
+   return false;
+   res = fscrypt_fname_disk_to_usr(dir, 0, 0, _name,
+   _name);
+   if (res < 0)
+   goto out;
+   entry.name = decrypted_name.name;
+   entry.len = decrypted_name.len;
+   }
+
res = utf8_strncasecmp_folded(um, name, );
if (res < 0) {
/*
@@ -228,9 +248,12 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * fall back to treating them as opaque byte sequences.
 */
if (sb_has_strict_encoding(sb) || name->len != entry.len)
-   return false;
-   return !memcmp(name->name, entry.name, name->len);
+   res = 1;
+   else
+   res = memcmp(name->name, entry.name, name->len);
}
+out:
+   kfree(decrypted_name.name);
return res == 0;
 }
 #endif /* CONFIG_UNICODE */
@@ -455,17 +478,39 @@ void f2fs_set_link(struct inode *dir, struct 
f2fs_dir_entry *de,
f2fs_put_page(page, 1);
 }
 
-static void init_dent_inode(const struct f2fs_filename *fname,
+static void init_dent_inode(struct inode *dir, struct inode *inode,
+   const struct f2fs_filename *fname,
struct page *ipage)
 {
struct f2fs_inode *ri;
 
+   if (!fname) /* tmpfile case? */
+   return;
+
f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
ri->i_namelen = cpu_to_le32(fname->disk_name.len);
memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
+   if (IS_ENCRYPTED(dir)) {
+   file_set_enc_name(inode);
+   /*
+* Roll-forward recovery doesn't have encryption keys available,
+* so it can't compute the dirhash for encrypted+casefolded
+* filenames.  Append it to i_name if possible.  Else, disable
+* roll-forward recovery of the dentry (i.e., make fsync'ing the
+* file force a checkpoint) by setting LOST_PINO.
+*/
+   if (IS_CASEFOLDED(dir)) {
+   if (fname->disk_name.len + sizeof(f2fs_hash_t) <=
+   F2FS_NAME_LEN)
+   put_unaligned(fname->hash, (f2fs_hash_t *)
+   >i_name[fname->disk_name.len]);
+   else
+   file_lost_pino(inode);
+   }
+   }
set_page_dirty(ipage);
 }
 
@@ -548,11 +593,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, 
struct inode *dir,
return page;
}
 
-  

[PATCH 1/5] ext4: Use generic casefolding support

2020-09-22 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd4..b437120f0b3f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = READ_ONCE(parent->d_inode);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 523e00d7b392..5df0fbd6add4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1411,14 +1411,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1468,10 +1460,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 2924261226e0..a92eb79de0cc 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 153a9fbe1dd0..ea7dee80c8a4 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1286,8 +1286,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   c

[PATCH 4/5] fscrypt: Have filesystems handle their d_ops

2020-09-22 Thread Daniel Rosenberg
This shifts the responsibility of setting up dentry operations from
fscrypt to the individual filesystems, allowing them to have their own
operations while still setting fscrypt's d_revalidate as appropriate.

Most filesystems can just use generic_set_encrypted_ci_d_ops, unless
they have their own specific dentry operations as well. That operation
will set the minimal d_ops required under the circumstances.

Since the fscrypt d_ops are set later on, we must set all d_ops there,
since we cannot adjust those later on. This should not result in any
change in behavior.

Signed-off-by: Daniel Rosenberg 
---
 fs/crypto/fname.c   | 4 
 fs/crypto/hooks.c   | 1 -
 fs/ext4/dir.c   | 7 ---
 fs/ext4/ext4.h  | 4 
 fs/ext4/namei.c | 1 +
 fs/ext4/super.c | 5 -
 fs/f2fs/dir.c   | 7 ---
 fs/f2fs/f2fs.h  | 3 ---
 fs/f2fs/namei.c | 1 +
 fs/f2fs/super.c | 1 -
 fs/ubifs/dir.c  | 1 +
 include/linux/fscrypt.h | 5 +++--
 12 files changed, 6 insertions(+), 34 deletions(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index d45db23ff6c4..efa942e3ab53 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -581,7 +581,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned 
int flags)
return valid;
 }
 EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
-
-const struct dentry_operations fscrypt_d_ops = {
-   .d_revalidate = fscrypt_d_revalidate,
-};
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 09fb8aa0f2e9..7d6898ca152a 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -118,7 +118,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct 
dentry *dentry,
spin_lock(>d_lock);
dentry->d_flags |= DCACHE_ENCRYPTED_NAME;
spin_unlock(>d_lock);
-   d_set_d_op(dentry, _d_ops);
}
return err;
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index b437120f0b3f..f0135042c2ad 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -667,10 +667,3 @@ const struct file_operations ext4_dir_operations = {
.open   = ext4_dir_open,
.release= ext4_release_dir,
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5df0fbd6add4..cbde8447eddd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3247,10 +3247,6 @@ static inline void ext4_unlock_group(struct super_block 
*sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ea7dee80c8a4..592ea2f8ea19 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1615,6 +1615,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode 
*dir,
struct buffer_head *bh;
 
err = ext4_fname_prepare_lookup(dir, dentry, );
+   generic_set_encrypted_ci_d_ops(dentry);
if (err == -ENOENT)
return NULL;
if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8a261a6bb608..ce67540bd882 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4719,11 +4719,6 @@ static int ext4_fill_super(struct super_block *sb, void 
*data, int silent)
goto failed_mount4;
}
 
-#ifdef CONFIG_UNICODE
-   if (sb->s_encoding)
-   sb->s_d_op = _dentry_ops;
-#endif
-
sb->s_root = d_make_root(root);
if (!sb->s_root) {
ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index a18f839b6fb2..0766e6250a88 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1106,10 +1106,3 @@ const struct file_operations f2fs_dir_operations = {
.compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations f2fs_dentry_ops = {
-   .d_hash = generic_ci_d_hash,
-   .d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 61fd78b1b1bd..af1d469e8c1e 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3774,9 +3774,6 @@ static inline void f2fs_update_sit_info(struct 
f2fs_sb_info *sbi) {}
 #endif
 
 extern const struct file_operations f2fs_dir_operations;
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations f2fs_dentry_ops;
-#endif
 extern const struct file_operations f2fs_file_operations;
 extern const struct inode_operations f2fs_file_inode_operations;
 extern const struct address_space_operations f2fs_dblock_aops;
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 90565432559c..70a8e516fd32 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -492,6 +492,7 @@ static struct d

[PATCH 2/5] fscrypt: Export fscrypt_d_revalidate

2020-09-22 Thread Daniel Rosenberg
This is in preparation for shifting the responsibility of setting the
dentry_operations to the filesystem, allowing it to maintain its own
operations.

Signed-off-by: Daniel Rosenberg 
---
 fs/crypto/fname.c   | 3 ++-
 include/linux/fscrypt.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 011830f84d8d..d45db23ff6c4 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -541,7 +541,7 @@ EXPORT_SYMBOL_GPL(fscrypt_fname_siphash);
  * Validate dentries in encrypted directories to make sure we aren't 
potentially
  * caching stale dentries after a key has been added.
  */
-static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
+int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 {
struct dentry *dir;
int err;
@@ -580,6 +580,7 @@ static int fscrypt_d_revalidate(struct dentry *dentry, 
unsigned int flags)
 
return valid;
 }
+EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
 
 const struct dentry_operations fscrypt_d_ops = {
.d_revalidate = fscrypt_d_revalidate,
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index 991ff8575d0e..265b1e9119dc 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -207,6 +207,7 @@ int fscrypt_fname_disk_to_usr(const struct inode *inode,
 bool fscrypt_match_name(const struct fscrypt_name *fname,
const u8 *de_name, u32 de_name_len);
 u64 fscrypt_fname_siphash(const struct inode *dir, const struct qstr *name);
+extern int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags);
 
 /* bio.c */
 void fscrypt_decrypt_bio(struct bio *bio);
-- 
2.28.0.681.g6f77f65b4e-goog



[PATCH 0/5] Add support for Encryption and Casefolding in F2FS

2020-09-22 Thread Daniel Rosenberg
These patches are on top of the f2fs dev branch

F2FS currently supports casefolding and encryption, but not at
the same time. These patches aim to rectify that. In a later follow up,
this will be added for Ext4 as well. I've included one ext4 patch from
the previous set since it isn't in the f2fs branch, but is needed for the
fscrypt changes.

The f2fs-tools changes have already been applied.

Since both fscrypt and casefolding require their own dentry operations,
I've moved the responsibility of setting the dentry operations from fscrypt
to the filesystems and provided helper functions that should work for most
cases.

These are a follow-up to the previously sent patch set
"[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches"

Daniel Rosenberg (5):
  ext4: Use generic casefolding support
  fscrypt: Export fscrypt_d_revalidate
  libfs: Add generic function for setting dentry_ops
  fscrypt: Have filesystems handle their d_ops
  f2fs: Handle casefolding with Encryption

 fs/crypto/fname.c   |  7 ++---
 fs/crypto/hooks.c   |  1 -
 fs/ext4/dir.c   | 67 -
 fs/ext4/ext4.h  | 16 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 21 ++---
 fs/ext4/super.c | 15 +++--
 fs/f2fs/dir.c   | 64 ++-
 fs/f2fs/f2fs.h  | 11 +++
 fs/f2fs/hash.c  | 11 ++-
 fs/f2fs/namei.c |  1 +
 fs/f2fs/recovery.c  | 12 +++-
 fs/f2fs/super.c |  7 -
 fs/libfs.c  | 49 ++
 fs/ubifs/dir.c  |  1 +
 include/linux/fs.h  |  1 +
 include/linux/fscrypt.h |  6 ++--
 17 files changed, 148 insertions(+), 144 deletions(-)

-- 
2.28.0.681.g6f77f65b4e-goog



[PATCH 3/5] libfs: Add generic function for setting dentry_ops

2020-09-22 Thread Daniel Rosenberg
This adds a function to set dentry operations at lookup time that will
work for both encrypted files and casefolded filenames.

A filesystem that supports both features simultaneously can use this
function during lookup preperations to set up its dentry operations once
fscrypt no longer does that itself.

Currently the casefolding dentry operation are always set because the
feature is toggleable on empty directories. Since we don't know what
set of functions we'll eventually need, and cannot change them later,
we add just add them.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 49 ++
 include/linux/fs.h |  1 +
 2 files changed, 50 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index fc34361c1489..83303858f1fe 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1449,4 +1449,53 @@ int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str)
return 0;
 }
 EXPORT_SYMBOL(generic_ci_d_hash);
+
+static const struct dentry_operations generic_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+};
+#endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if IS_ENABLED(CONFIG_UNICODE) && IS_ENABLED(CONFIG_FS_ENCRYPTION)
+static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
+   .d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+/**
+ * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
+ * @dentry:dentry to set ops on
+ *
+ * This function sets the dentry ops for the given dentry to handle both
+ * casefolding and encryption of the dentry name.
+ */
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+   if (dentry->d_flags & DCACHE_ENCRYPTED_NAME) {
+#ifdef CONFIG_UNICODE
+   if (dentry->d_sb->s_encoding) {
+   d_set_d_op(dentry, _encrypted_ci_dentry_ops);
+   return;
+   }
 #endif
+   d_set_d_op(dentry, _encrypted_dentry_ops);
+   return;
+   }
+#endif
+#ifdef CONFIG_UNICODE
+   if (dentry->d_sb->s_encoding) {
+   d_set_d_op(dentry, _ci_dentry_ops);
+   return;
+   }
+#endif
+}
+EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bc5417c61e12..6627896db835 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3277,6 +3277,7 @@ extern int generic_ci_d_hash(const struct dentry *dentry, 
struct qstr *str);
 extern int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
const char *str, const struct qstr *name);
 #endif
+extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry);
 
 #ifdef CONFIG_MIGRATION
 extern int buffer_migrate_page(struct address_space *,
-- 
2.28.0.681.g6f77f65b4e-goog



[PATCH 0/5] Add support for Encryption and Casefolding in F2FS

2020-09-22 Thread Daniel Rosenberg
These patches are on top of the f2fs dev branch

F2FS currently supports casefolding and encryption, but not at
the same time. These patches aim to rectify that. In a later follow up,
this will be added for Ext4 as well. I've included one ext4 patch from
the previous set since it isn't in the f2fs branch, but is needed for the
fscrypt changes.

The f2fs-tools changes have already been applied.

Since both fscrypt and casefolding require their own dentry operations,
I've moved the responsibility of setting the dentry operations from fscrypt
to the filesystems and provided helper functions that should work for most
cases.

These are a follow-up to the previously sent patch set
"[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches"

Daniel Rosenberg (5):
  ext4: Use generic casefolding support
  fscrypt: Export fscrypt_d_revalidate
  libfs: Add generic function for setting dentry_ops
  fscrypt: Have filesystems handle their d_ops
  f2fs: Handle casefolding with Encryption

 fs/crypto/fname.c   |  7 ++---
 fs/crypto/hooks.c   |  1 -
 fs/ext4/dir.c   | 67 -
 fs/ext4/ext4.h  | 16 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 21 ++---
 fs/ext4/super.c | 15 +++--
 fs/f2fs/dir.c   | 64 ++-
 fs/f2fs/f2fs.h  | 11 +++
 fs/f2fs/hash.c  | 11 ++-
 fs/f2fs/namei.c |  1 +
 fs/f2fs/recovery.c  | 12 +++-
 fs/f2fs/super.c |  7 -
 fs/libfs.c  | 49 ++
 fs/ubifs/dir.c  |  1 +
 include/linux/fs.h  |  1 +
 include/linux/fscrypt.h |  6 ++--
 17 files changed, 148 insertions(+), 144 deletions(-)

-- 
2.28.0.681.g6f77f65b4e-goog



Re: [PATCH v12 4/4] ext4: Use generic casefolding support

2020-09-22 Thread Daniel Rosenberg
On Mon, Sep 21, 2020 at 11:29 AM Eric Biggers  wrote:
>
> On Sun, Sep 20, 2020 at 09:10:57PM -0400, Gabriel Krisman Bertazi wrote:
> > Daniel Rosenberg  writes:
> >
> > > This switches ext4 over to the generic support provided in
> > > the previous patch.
> > >
> > > Since casefolded dentries behave the same in ext4 and f2fs, we decrease
> > > the maintenance burden by unifying them, and any optimizations will
> > > immediately apply to both.
> > >
> > > Signed-off-by: Daniel Rosenberg 
> > > Reviewed-by: Eric Biggers 
> > >
> > >  #ifdef CONFIG_UNICODE
> > > -   if (EXT4_SB(parent->i_sb)->s_encoding && IS_CASEFOLDED(parent)) {
> > > +   if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent)) {
> > > if (fname->cf_name.name) {
> > > struct qstr cf = {.name = fname->cf_name.name,
> > >   .len = fname->cf_name.len};
> > > @@ -2171,9 +2171,6 @@ static int ext4_add_entry(handle_t *handle, struct 
> > > dentry *dentry,
> > > struct buffer_head *bh = NULL;
> > > struct ext4_dir_entry_2 *de;
> > > struct super_block *sb;
> > > -#ifdef CONFIG_UNICODE
> > > -   struct ext4_sb_info *sbi;
> > > -#endif
> > > struct ext4_filename fname;
> > > int retval;
> > > int dx_fallback=0;
> > > @@ -2190,9 +2187,8 @@ static int ext4_add_entry(handle_t *handle, struct 
> > > dentry *dentry,
> > > return -EINVAL;
> > >
> > >  #ifdef CONFIG_UNICODE
> > > -   sbi = EXT4_SB(sb);
> > > -   if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
> > > -   sbi->s_encoding && utf8_validate(sbi->s_encoding, 
> > > >d_name))
> > > +   if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
> > > +   sb->s_encoding && utf8_validate(sb->s_encoding, >d_name))
> > > return -EINVAL;
> >
> > hm, just noticed the sb->s_encoding check here is superfluous, since the
> > has_strict_mode() cannot be true if !s_encoding.  Not related to this
> > patch though.
> >
> > Daniel, are you still working on getting this upstream?  The fscrypt
> > support would be very useful for us. :)
> >
> > In the hope this will get upstream, as its been flying for a while and
> > looks correct.
> >
> > Reviewed-by: Gabriel Krisman Bertazi 
>
> We couldn't get a response from Ted, so instead Jaegeuk has applied patches 
> 1-3
> to f2fs/dev for 5.10.  Hopefully Ted will take the ext4 patch for 5.11.
>
> I believe that Daniel is planning to resend the actual encryption+casefolding
> support soon, but initially only for f2fs since that will be ready first.
>
> - Eric

Yes, planning to send them shortly. Just checking that I've not missed
anything. For the sb_has_strict_encoding, I don't think that's
actually checking s_encoding, though it does check s_encoding_flags.
I'm planning to resend this one with that set, since it's not queued
in the f2fs tree.

-Daniel


[PATCH v12 4/4] ext4: Use generic casefolding support

2020-07-08 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd4..b437120f0b3f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = READ_ONCE(parent->d_inode);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 42f5060f3cdf..5cd8be24a4fd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1393,14 +1393,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1450,10 +1442,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3e133793a5a3..143b0073b3f4 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf..6ffd53e6455e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1286,8 +1286,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   c

[PATCH v12 2/4] fs: Add standard casefolding support

2020-07-08 Thread Daniel Rosenberg
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

The new dentry operations are functionally equivalent to the existing
operations in ext4 and f2fs, apart from the use of utf8_casefold_hash to
avoid an allocation.

By providing a common implementation, all users can benefit from any
optimizations without needing to port over improvements.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 87 ++
 include/linux/fs.h | 16 +
 2 files changed, 103 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index 4d08edf19c78..72407cf151d4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 
@@ -1363,3 +1365,88 @@ bool is_empty_dir_inode(struct inode *inode)
return (inode->i_fop == _dir_operations) &&
(inode->i_op == _dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+/*
+ * Determine if the name of a dentry should be casefolded.
+ *
+ * Return: if names will need casefolding
+ */
+static bool needs_casefold(const struct inode *dir)
+{
+   return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
+}
+
+/**
+ * generic_ci_d_compare - generic d_compare implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are checking against
+ * @len:   len of name of dentry
+ * @str:   str pointer to name of dentry
+ * @name:  Name to compare against
+ *
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
+ */
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+   const struct dentry *parent = READ_ONCE(dentry->d_parent);
+   const struct inode *dir = READ_ONCE(parent->d_inode);
+   const struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   struct qstr qstr = QSTR_INIT(str, len);
+   char strbuf[DNAME_INLINE_LEN];
+   int ret;
+
+   if (!dir || !needs_casefold(dir))
+   goto fallback;
+   /*
+* If the dentry name is stored in-line, then it may be concurrently
+* modified by a rename.  If this happens, the VFS will eventually retry
+* the lookup, so it doesn't matter what ->d_compare() returns.
+* However, it's unsafe to call utf8_strncasecmp() with an unstable
+* string.  Therefore, we have to copy the name into a temporary buffer.
+*/
+   if (len <= DNAME_INLINE_LEN - 1) {
+   memcpy(strbuf, str, len);
+   strbuf[len] = 0;
+   qstr.name = strbuf;
+   /* prevent compiler from optimizing out the temporary buffer */
+   barrier();
+   }
+   ret = utf8_strncasecmp(um, name, );
+   if (ret >= 0)
+   return ret;
+
+   if (sb_has_strict_encoding(sb))
+   return -EINVAL;
+fallback:
+   if (len != name->len)
+   return 1;
+   return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+/**
+ * generic_ci_d_hash - generic d_hash implementation for casefolding 
filesystems
+ * @dentry:dentry of the parent directory
+ * @str:   qstr of name whose hash we should fill in
+ *
+ * Return: 0 if hash was successful or unchanged, and -EINVAL on error
+ */
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+   const struct inode *dir = READ_ONCE(dentry->d_inode);
+   struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   int ret = 0;
+
+   if (!dir || !needs_casefold(dir))
+   return 0;
+
+   ret = utf8_casefold_hash(um, dentry, str);
+   if (ret < 0 && sb_has_strict_encoding(sb))
+   return -EINVAL;
+   return 0;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f881a892ea7..af8f2ecec8ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1392,6 +1392,12 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE  (1<<30)
 #define SB_NOUSER  (1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL  (1 << 0)
+
+#define sb_has_strict_encoding(sb) \
+   (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  * Umount options
  */
@@ -1461,6 +1467,10 @@ struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
+#endif
+#ifdef CONFIG_UNICODE
+   struct unicode_map *s_encoding;
+   __u16 s_encoding_flags;
 #endif
struct hlist_bl_heads_roots;/* alternate root dentries for 
NFS */
struct list_h

[PATCH v12 3/4] f2fs: Use generic casefolding support

2020-07-08 Thread Daniel Rosenberg
This switches f2fs over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/f2fs/dir.c   | 84 +
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 include/linux/f2fs_fs.h |  3 --
 5 files changed, 20 insertions(+), 91 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d35976785e8c..ff61f3a9c11d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
  struct f2fs_filename *fname)
 {
 #ifdef CONFIG_UNICODE
-   struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+   struct super_block *sb = dir->i_sb;
+   struct f2fs_sb_info *sbi = F2FS_SB(sb);
 
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
   GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
-   fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+   fname->cf_name.len = utf8_casefold(sb->s_encoding,
   fname->usr_fname,
   fname->cf_name.name,
   F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
-   if (f2fs_has_strict_mode(sbi))
+   if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -215,8 +216,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
   const u8 *de_name, u32 de_name_len)
 {
-   const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-   const struct unicode_map *um = sbi->s_encoding;
+   const struct super_block *sb = dir->i_sb;
+   const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
@@ -226,7 +227,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * In strict mode, ignore invalid names.  In non-strict mode,
 * fall back to treating them as opaque byte sequences.
 */
-   if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+   if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -1107,75 +1108,8 @@ const struct file_operations f2fs_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *dir = READ_ONCE(parent->d_inode);
-   const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   struct qstr entry = QSTR_INIT(str, len);
-   char strbuf[DNAME_INLINE_LEN];
-   int res;
-
-   if (!dir || !IS_CASEFOLDED(dir))
-   goto fallback;
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   entry.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   res = utf8_strncasecmp(sbi->s_encoding, name, );
-   if (res >= 0)
-   return res;
-
-   if (f2fs_has_strict_mode(sbi))
-   return -EINVAL;
-fallback:
-   if (len != name->len)
-   return 1;
-   return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const stru

[PATCH v12 1/4] unicode: Add utf8_casefold_hash

2020-07-08 Thread Daniel Rosenberg
This adds a case insensitive hash function to allow taking the hash
without needing to allocate a casefolded copy of the string.

The existing d_hash implementations for casefolding allocate memory
within rcu-walk, by avoiding it we can be more efficient and avoid
worrying about a failed allocation.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Gabriel Krisman Bertazi 
Reviewed-by: Eric Biggers 
---
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/unicode.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115..dc25823bfed9 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "utf8n.h"
 
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
struct qstr *str,
}
return -EINVAL;
 }
-
 EXPORT_SYMBOL(utf8_casefold);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str)
+{
+   const struct utf8data *data = utf8nfdicf(um->version);
+   struct utf8cursor cur;
+   int c;
+   unsigned long hash = init_name_hash(salt);
+
+   if (utf8ncursor(, data, str->name, str->len) < 0)
+   return -EINVAL;
+
+   while ((c = utf8byte())) {
+   if (c < 0)
+   return -EINVAL;
+   hash = partial_name_hash((unsigned char)c, hash);
+   }
+   str->hash = end_name_hash(hash);
+   return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
   unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d8049..74484d44c755 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct 
qstr *str,
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
  unsigned char *dest, size_t dlen);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str);
+
 struct unicode_map *utf8_load(const char *version);
 void utf8_unload(struct unicode_map *um);
 
-- 
2.27.0.383.g050319c2ae-goog



[PATCH v12 0/4] Prepare for upcoming Casefolding/Encryption patches

2020-07-08 Thread Daniel Rosenberg
This lays the ground work for enabling casefolding and encryption at the
same time for ext4 and f2fs. A future set of patches will enable that
functionality.

These unify the highly similar dentry_operations that ext4 and f2fs both
use for casefolding. In addition, they improve d_hash by not requiring a
new string allocation.

Daniel Rosenberg (4):
  unicode: Add utf8_casefold_hash
  fs: Add standard casefolding support
  f2fs: Use generic casefolding support
  ext4: Use generic casefolding support

 fs/ext4/dir.c   | 64 +-
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 --
 fs/ext4/super.c | 12 +++---
 fs/f2fs/dir.c   | 84 +--
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 fs/libfs.c  | 87 +
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/f2fs_fs.h |  3 --
 include/linux/fs.h  | 16 
 include/linux/unicode.h |  3 ++
 14 files changed, 165 insertions(+), 185 deletions(-)

-- 
2.27.0.383.g050319c2ae-goog



Re: [PATCH v11 2/4] fs: Add standard casefolding support

2020-07-08 Thread Daniel Rosenberg
On Tue, Jul 7, 2020 at 9:12 PM Eric Biggers  wrote:
>
> On Tue, Jul 07, 2020 at 08:05:50PM -0700, Daniel Rosenberg wrote:
> > +/**
> > + * generic_ci_d_compare - generic d_compare implementation for casefolding 
> > filesystems
> > + * @dentry:  dentry whose name we are checking against
> > + * @len: len of name of dentry
> > + * @str: str pointer to name of dentry
> > + * @name:Name to compare against
> > + *
> > + * Return: 0 if names match, 1 if mismatch, or -ERRNO
> > + */
> > +int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
> > +   const char *str, const struct qstr *name)
> > +{
> > + const struct dentry *parent = READ_ONCE(dentry->d_parent);
> > + const struct inode *inode = READ_ONCE(parent->d_inode);
>
> How about calling the 'inode' variable 'dir' instead?
>
> That would help avoid confusion about what is the directory and what is a file
> in the directory.
>
> Likewise in generic_ci_d_hash().
>
> > +/**
> > + * generic_ci_d_hash - generic d_hash implementation for casefolding 
> > filesystems
> > + * @dentry:  dentry whose name we are hashing
>
> This comment for @dentry needs to be updated.
>
> It's the parent dentry, not the dentry whose name we are hashing.
>
> > + * @str: qstr of name whose hash we should fill in
> > + *
> > + * Return: 0 if hash was successful, or -ERRNO
>
> As I mentioned on v9, this can also return 0 if the hashing was not done 
> because
> it wants to fallback to the standard hashing.  Can you please fix the comment?
>
> > +int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
> > +{
> > + const struct inode *inode = READ_ONCE(dentry->d_inode);
> > + struct super_block *sb = dentry->d_sb;
> > + const struct unicode_map *um = sb->s_encoding;
> > + int ret = 0;
> > +
> > + if (!inode || !needs_casefold(inode))
> > + return 0;
> > +
> > + ret = utf8_casefold_hash(um, dentry, str);
> > + if (ret < 0)
> > + goto err;
> > +
> > + return 0;
> > +err:
> > + if (sb_has_strict_encoding(sb))
> > + ret = -EINVAL;
> > + else
> > + ret = 0;
> > + return ret;
> > +}
>
> On v9, Gabriel suggested simplifying this to:
>
> ret = utf8_casefold_hash(um, dentry, str);
> if (ret < 0 && sb_has_enc_strict_mode(sb))
> return -EINVAL;
> return 0;
>
> Any reason not to do that?
>
> - Eric

Guh, I remember making those changes, must've lost them in a rebase :(
I'll resend shortly.
-Daniel


[PATCH v11 1/4] unicode: Add utf8_casefold_hash

2020-07-07 Thread Daniel Rosenberg
This adds a case insensitive hash function to allow taking the hash
without needing to allocate a casefolded copy of the string.

The existing d_hash implementations for casefolding allocate memory
within rcu-walk, by avoiding it we can be more efficient and avoid
worrying about a failed allocation.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Gabriel Krisman Bertazi 
Reviewed-by: Eric Biggers 
---
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/unicode.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115..dc25823bfed9 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "utf8n.h"
 
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
struct qstr *str,
}
return -EINVAL;
 }
-
 EXPORT_SYMBOL(utf8_casefold);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str)
+{
+   const struct utf8data *data = utf8nfdicf(um->version);
+   struct utf8cursor cur;
+   int c;
+   unsigned long hash = init_name_hash(salt);
+
+   if (utf8ncursor(, data, str->name, str->len) < 0)
+   return -EINVAL;
+
+   while ((c = utf8byte())) {
+   if (c < 0)
+   return -EINVAL;
+   hash = partial_name_hash((unsigned char)c, hash);
+   }
+   str->hash = end_name_hash(hash);
+   return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
   unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d8049..74484d44c755 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct 
qstr *str,
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
  unsigned char *dest, size_t dlen);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str);
+
 struct unicode_map *utf8_load(const char *version);
 void utf8_unload(struct unicode_map *um);
 
-- 
2.27.0.383.g050319c2ae-goog



[PATCH v11 0/4] Prepare for upcoming Casefolding/Encryption patches

2020-07-07 Thread Daniel Rosenberg
This lays the ground work for enabling casefolding and encryption at the
same time for ext4 and f2fs. A future set of patches will enable that
functionality.

These unify the highly similar dentry_operations that ext4 and f2fs both
use for casefolding. In addition, they improve d_hash by not requiring a
new string allocation.

Daniel Rosenberg (4):
  unicode: Add utf8_casefold_hash
  fs: Add standard casefolding support
  f2fs: Use generic casefolding support
  ext4: Use generic casefolding support

 fs/ext4/dir.c   | 64 +---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 -
 fs/ext4/super.c | 12 +++---
 fs/f2fs/dir.c   | 84 
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 fs/libfs.c  | 94 +
 fs/unicode/utf8-core.c  | 23 +-
 include/linux/f2fs_fs.h |  3 --
 include/linux/fs.h  | 16 +++
 include/linux/unicode.h |  3 ++
 14 files changed, 172 insertions(+), 185 deletions(-)

-- 
2.27.0.383.g050319c2ae-goog



[PATCH v11 3/4] f2fs: Use generic casefolding support

2020-07-07 Thread Daniel Rosenberg
This switches f2fs over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/f2fs/dir.c   | 84 +
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 include/linux/f2fs_fs.h |  3 --
 5 files changed, 20 insertions(+), 91 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d35976785e8c..ff61f3a9c11d 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
  struct f2fs_filename *fname)
 {
 #ifdef CONFIG_UNICODE
-   struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+   struct super_block *sb = dir->i_sb;
+   struct f2fs_sb_info *sbi = F2FS_SB(sb);
 
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
   GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
-   fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+   fname->cf_name.len = utf8_casefold(sb->s_encoding,
   fname->usr_fname,
   fname->cf_name.name,
   F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
-   if (f2fs_has_strict_mode(sbi))
+   if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -215,8 +216,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
   const u8 *de_name, u32 de_name_len)
 {
-   const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-   const struct unicode_map *um = sbi->s_encoding;
+   const struct super_block *sb = dir->i_sb;
+   const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
@@ -226,7 +227,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * In strict mode, ignore invalid names.  In non-strict mode,
 * fall back to treating them as opaque byte sequences.
 */
-   if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+   if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -1107,75 +1108,8 @@ const struct file_operations f2fs_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *dir = READ_ONCE(parent->d_inode);
-   const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   struct qstr entry = QSTR_INIT(str, len);
-   char strbuf[DNAME_INLINE_LEN];
-   int res;
-
-   if (!dir || !IS_CASEFOLDED(dir))
-   goto fallback;
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   entry.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   res = utf8_strncasecmp(sbi->s_encoding, name, );
-   if (res >= 0)
-   return res;
-
-   if (f2fs_has_strict_mode(sbi))
-   return -EINVAL;
-fallback:
-   if (len != name->len)
-   return 1;
-   return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const stru

[PATCH v11 2/4] fs: Add standard casefolding support

2020-07-07 Thread Daniel Rosenberg
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

The new dentry operations are functionally equivalent to the existing
operations in ext4 and f2fs, apart from the use of utf8_casefold_hash to
avoid an allocation.

By providing a common implementation, all users can benefit from any
optimizations without needing to port over improvements.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 94 ++
 include/linux/fs.h | 16 
 2 files changed, 110 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index 4d08edf19c78..fe22e2be6f7a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 
@@ -1363,3 +1365,95 @@ bool is_empty_dir_inode(struct inode *inode)
return (inode->i_fop == _dir_operations) &&
(inode->i_op == _dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+/*
+ * Determine if the name of a dentry should be casefolded.
+ *
+ * Return: if names will need casefolding
+ */
+static bool needs_casefold(const struct inode *dir)
+{
+   return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding;
+}
+
+/**
+ * generic_ci_d_compare - generic d_compare implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are checking against
+ * @len:   len of name of dentry
+ * @str:   str pointer to name of dentry
+ * @name:  Name to compare against
+ *
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
+ */
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+   const struct dentry *parent = READ_ONCE(dentry->d_parent);
+   const struct inode *inode = READ_ONCE(parent->d_inode);
+   const struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   struct qstr qstr = QSTR_INIT(str, len);
+   char strbuf[DNAME_INLINE_LEN];
+   int ret;
+
+   if (!inode || !needs_casefold(inode))
+   goto fallback;
+   /*
+* If the dentry name is stored in-line, then it may be concurrently
+* modified by a rename.  If this happens, the VFS will eventually retry
+* the lookup, so it doesn't matter what ->d_compare() returns.
+* However, it's unsafe to call utf8_strncasecmp() with an unstable
+* string.  Therefore, we have to copy the name into a temporary buffer.
+*/
+   if (len <= DNAME_INLINE_LEN - 1) {
+   memcpy(strbuf, str, len);
+   strbuf[len] = 0;
+   qstr.name = strbuf;
+   /* prevent compiler from optimizing out the temporary buffer */
+   barrier();
+   }
+   ret = utf8_strncasecmp(um, name, );
+   if (ret >= 0)
+   return ret;
+
+   if (sb_has_strict_encoding(sb))
+   return -EINVAL;
+fallback:
+   if (len != name->len)
+   return 1;
+   return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+/**
+ * generic_ci_d_hash - generic d_hash implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are hashing
+ * @str:   qstr of name whose hash we should fill in
+ *
+ * Return: 0 if hash was successful, or -ERRNO
+ */
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+   const struct inode *inode = READ_ONCE(dentry->d_inode);
+   struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   int ret = 0;
+
+   if (!inode || !needs_casefold(inode))
+   return 0;
+
+   ret = utf8_casefold_hash(um, dentry, str);
+   if (ret < 0)
+   goto err;
+
+   return 0;
+err:
+   if (sb_has_strict_encoding(sb))
+   ret = -EINVAL;
+   else
+   ret = 0;
+   return ret;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f881a892ea7..af8f2ecec8ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1392,6 +1392,12 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE  (1<<30)
 #define SB_NOUSER  (1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL  (1 << 0)
+
+#define sb_has_strict_encoding(sb) \
+   (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  * Umount options
  */
@@ -1461,6 +1467,10 @@ struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
+#endif
+#ifdef CONFIG_UNICODE
+   struct unicode_map *s_encoding;
+   __u16 s_encoding_flags;
 #endif
struct hlist_bl_heads_r

[PATCH v11 4/4] ext4: Use generic casefolding support

2020-07-07 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
Reviewed-by: Eric Biggers 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd4..b437120f0b3f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = READ_ONCE(parent->d_inode);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 42f5060f3cdf..5cd8be24a4fd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1393,14 +1393,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1450,10 +1442,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3e133793a5a3..143b0073b3f4 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf..6ffd53e6455e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1286,8 +1286,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   c

[PATCH v10 0/4] Prepare for upcoming Casefolding/Encryption patches

2020-07-07 Thread Daniel Rosenberg
This lays the ground work for enabling casefolding and encryption at the
same time for ext4 and f2fs. A future set of patches will enable that
functionality.

These unify the highly similar dentry_operations that ext4 and f2fs both
use for casefolding. In addition, they improve d_hash by not requiring a
new string allocation, and ensure we don't attempt to casefold the no-key
token of an encrypted filename.

Daniel Rosenberg (4):
  unicode: Add utf8_casefold_hash
  fs: Add standard casefolding support
  f2fs: Use generic casefolding support
  ext4: Use generic casefolding support

 fs/ext4/dir.c   | 64 +--
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 -
 fs/ext4/super.c | 12 +++---
 fs/f2fs/dir.c   | 83 ---
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 fs/libfs.c  | 96 +
 fs/unicode/utf8-core.c  | 23 +-
 include/linux/f2fs_fs.h |  3 --
 include/linux/fs.h  | 16 +++
 include/linux/unicode.h |  3 ++
 14 files changed, 174 insertions(+), 184 deletions(-)

-- 
2.27.0.212.ge8ba1cc988-goog



[PATCH v10 1/4] unicode: Add utf8_casefold_hash

2020-07-07 Thread Daniel Rosenberg
This adds a case insensitive hash function to allow taking the hash
without needing to allocate a casefolded copy of the string.

The existing d_hash implementations for casefolding allocates memory
within rcu-walk, by avoiding it we can be more efficient and avoid
worrying about a failed allocation.

Signed-off-by: Daniel Rosenberg 
---
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/unicode.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115..dc25823bfed9 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "utf8n.h"
 
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
struct qstr *str,
}
return -EINVAL;
 }
-
 EXPORT_SYMBOL(utf8_casefold);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str)
+{
+   const struct utf8data *data = utf8nfdicf(um->version);
+   struct utf8cursor cur;
+   int c;
+   unsigned long hash = init_name_hash(salt);
+
+   if (utf8ncursor(, data, str->name, str->len) < 0)
+   return -EINVAL;
+
+   while ((c = utf8byte())) {
+   if (c < 0)
+   return -EINVAL;
+   hash = partial_name_hash((unsigned char)c, hash);
+   }
+   str->hash = end_name_hash(hash);
+   return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
   unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d8049..74484d44c755 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct 
qstr *str,
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
  unsigned char *dest, size_t dlen);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str);
+
 struct unicode_map *utf8_load(const char *version);
 void utf8_unload(struct unicode_map *um);
 
-- 
2.27.0.212.ge8ba1cc988-goog



[PATCH v10 2/4] fs: Add standard casefolding support

2020-07-07 Thread Daniel Rosenberg
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

The new dentry operations are functionally equivalent to the existing
operations in ext4 and f2fs, apart from ths use of utf8_casefold_hash to
avoid an allocation, and dealing with casefolding and encryption. It
doesn't make sense to casefold the no-key token for the encrypted name,
so we don't casefold in that case.

By providing a common implementation, all users can benefit from any
optimizations without needing to port over improvements.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 96 ++
 include/linux/fs.h | 16 
 2 files changed, 112 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index 4d08edf19c78..f7df13806f36 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 
@@ -1363,3 +1365,97 @@ bool is_empty_dir_inode(struct inode *inode)
return (inode->i_fop == _dir_operations) &&
(inode->i_op == _dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+/*
+ * Determine if the name of a dentry should be casefolded. It does not make
+ * sense to casefold the no-key token of an encrypted filename.
+ *
+ * Return: if names will need casefolding
+ */
+static bool needs_casefold(const struct inode *dir, const struct dentry 
*dentry)
+{
+   return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
+   !(dentry->d_flags & DCACHE_ENCRYPTED_NAME);
+}
+
+/**
+ * generic_ci_d_compare - generic d_compare implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are checking against
+ * @len:   len of name of dentry
+ * @str:   str pointer to name of dentry
+ * @name:  Name to compare against
+ *
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
+ */
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+   const struct dentry *parent = READ_ONCE(dentry->d_parent);
+   const struct inode *inode = READ_ONCE(parent->d_inode);
+   const struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   struct qstr qstr = QSTR_INIT(str, len);
+   char strbuf[DNAME_INLINE_LEN];
+   int ret;
+
+   if (!inode || !needs_casefold(inode, dentry))
+   goto fallback;
+   /*
+* If the dentry name is stored in-line, then it may be concurrently
+* modified by a rename.  If this happens, the VFS will eventually retry
+* the lookup, so it doesn't matter what ->d_compare() returns.
+* However, it's unsafe to call utf8_strncasecmp() with an unstable
+* string.  Therefore, we have to copy the name into a temporary buffer.
+*/
+   if (len <= DNAME_INLINE_LEN - 1) {
+   memcpy(strbuf, str, len);
+   strbuf[len] = 0;
+   qstr.name = strbuf;
+   /* prevent compiler from optimizing out the temporary buffer */
+   barrier();
+   }
+   ret = utf8_strncasecmp(um, name, );
+   if (ret >= 0)
+   return ret;
+
+   if (sb_has_strict_encoding(sb))
+   return -EINVAL;
+fallback:
+   if (len != name->len)
+   return 1;
+   return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+/**
+ * generic_ci_d_hash - generic d_hash implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are hashing
+ * @str:   qstr of name whose hash we should fill in
+ *
+ * Return: 0 if hash was successful, or -ERRNO
+ */
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+   const struct inode *inode = READ_ONCE(dentry->d_inode);
+   struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   int ret = 0;
+
+   if (!inode || !needs_casefold(inode, dentry))
+   return 0;
+
+   ret = utf8_casefold_hash(um, dentry, str);
+   if (ret < 0)
+   goto err;
+
+   return 0;
+err:
+   if (sb_has_strict_encoding(sb))
+   ret = -EINVAL;
+   else
+   ret = 0;
+   return ret;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f881a892ea7..af8f2ecec8ff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1392,6 +1392,12 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE  (1<<30)
 #define SB_NOUSER  (1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL  (1 << 0)
+
+#define sb_has_strict_encoding(sb) \
+   

[PATCH v10 3/4] f2fs: Use generic casefolding support

2020-07-07 Thread Daniel Rosenberg
This switches f2fs over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
---
 fs/f2fs/dir.c   | 83 +
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 include/linux/f2fs_fs.h |  3 --
 5 files changed, 20 insertions(+), 90 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d35976785e8c..18bc97b2c72c 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
  struct f2fs_filename *fname)
 {
 #ifdef CONFIG_UNICODE
-   struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+   struct super_block *sb = dir->i_sb;
+   struct f2fs_sb_info *sbi = F2FS_SB(sb);
 
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
   GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
-   fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+   fname->cf_name.len = utf8_casefold(sb->s_encoding,
   fname->usr_fname,
   fname->cf_name.name,
   F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
-   if (f2fs_has_strict_mode(sbi))
+   if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -215,8 +216,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
   const u8 *de_name, u32 de_name_len)
 {
-   const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-   const struct unicode_map *um = sbi->s_encoding;
+   const struct super_block *sb = dir->i_sb;
+   const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
@@ -226,7 +227,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * In strict mode, ignore invalid names.  In non-strict mode,
 * fall back to treating them as opaque byte sequences.
 */
-   if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+   if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -1107,75 +1108,9 @@ const struct file_operations f2fs_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *dir = READ_ONCE(parent->d_inode);
-   const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   struct qstr entry = QSTR_INIT(str, len);
-   char strbuf[DNAME_INLINE_LEN];
-   int res;
-
-   if (!dir || !IS_CASEFOLDED(dir))
-   goto fallback;
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   entry.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   res = utf8_strncasecmp(sbi->s_encoding, name, );
-   if (res >= 0)
-   return res;
-
-   if (f2fs_has_strict_mode(sbi))
-   return -EINVAL;
-fallback:
-   if (len != name->len)
-   return 1;
-   return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ

[PATCH v10 4/4] ext4: Use generic casefolding support

2020-07-07 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in
the previous patch.

Since casefolded dentries behave the same in ext4 and f2fs, we decrease
the maintenance burden by unifying them, and any optimizations will
immediately apply to both.

Signed-off-by: Daniel Rosenberg 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd4..b437120f0b3f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = READ_ONCE(parent->d_inode);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 42f5060f3cdf..5cd8be24a4fd 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1393,14 +1393,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1450,10 +1442,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3e133793a5a3..143b0073b3f4 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf..6ffd53e6455e 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1286,8 +1286,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   const struct unicode_map *um = s

Re: [PATCH v9 4/4] ext4: Use generic casefolding support

2020-07-07 Thread Daniel Rosenberg
On Tue, Jun 23, 2020 at 10:43 PM Gabriel Krisman Bertazi
 wrote:
>
> Daniel Rosenberg  writes:
>
> > -
> >  const struct dentry_operations ext4_dentry_ops = {
> > - .d_hash = ext4_d_hash,
> > - .d_compare = ext4_d_compare,
> > + .d_hash = generic_ci_d_hash,
> > + .d_compare = generic_ci_d_compare,
> >  };
> >  #endif
>
> Can you make the structure generic since it is the same for f2fs and
> ext4, which let you drop the code guards?  Unless that becomes a problem for
> d_revalidate with fscrypt, it is fine like this.
>
> --
> Gabriel Krisman Bertazi

I unify them in a later patch, since I end up having to deal with
fscrypt's d_revalidate. With that patch I'd end up undoing the export
I'd add for this, so I'll skip that for the moment.

-Daniel


Re: [PATCH v9 2/4] fs: Add standard casefolding support

2020-07-02 Thread Daniel Rosenberg
On Tue, Jun 23, 2020 at 10:57 PM Eric Biggers  wrote:
>
> Note that the '!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir)' check 
> can
> be racy, because a process can be looking up a no-key token in a directory 
> while
> concurrently another process initializes the directory's ->i_crypt_info, 
> causing
> fscrypt_has_encryption_key(dir) to suddenly start returning true.
>
> In my rework of filename handling in f2fs, I actually ended up removing all
> calls to needs_casefold(), thus avoiding this race.  f2fs now decides whether
> the name is going to need casefolding early on, in __f2fs_setup_filename(),
> where it knows in a race-free way whether the filename is a no-key token or 
> not.
>
> Perhaps ext4 should work the same way?  It did look like there would be some
> extra complexity due to how the ext4 directory hashing works in comparison to
> f2fs's, but I haven't had a chance to properly investigate it.
>
> - Eric

Hm. I think I should be able to just check for DCACHE_ENCRYPTED_NAME
in the dentry here, right? I'm just trying to avoid casefolding the
no-key token, and that flag should indicate that.
I'll see if I can rework the ext4 patches to not need needs_casefold
as well, since then there'd be no need to export it.
-Daniel


[PATCH v9 1/4] unicode: Add utf8_casefold_hash

2020-06-23 Thread Daniel Rosenberg
This adds a case insensitive hash function to allow taking the hash
without needing to allocate a casefolded copy of the string.

Signed-off-by: Daniel Rosenberg 
---
 fs/unicode/utf8-core.c  | 23 ++-
 include/linux/unicode.h |  3 +++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/fs/unicode/utf8-core.c b/fs/unicode/utf8-core.c
index 2a878b739115d..90656b9980720 100644
--- a/fs/unicode/utf8-core.c
+++ b/fs/unicode/utf8-core.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "utf8n.h"
 
@@ -122,9 +123,29 @@ int utf8_casefold(const struct unicode_map *um, const 
struct qstr *str,
}
return -EINVAL;
 }
-
 EXPORT_SYMBOL(utf8_casefold);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str)
+{
+   const struct utf8data *data = utf8nfdicf(um->version);
+   struct utf8cursor cur;
+   int c;
+   unsigned long hash = init_name_hash(salt);
+
+   if (utf8ncursor(, data, str->name, str->len) < 0)
+   return -EINVAL;
+
+   while ((c = utf8byte())) {
+   if (c < 0)
+   return c;
+   hash = partial_name_hash((unsigned char)c, hash);
+   }
+   str->hash = end_name_hash(hash);
+   return 0;
+}
+EXPORT_SYMBOL(utf8_casefold_hash);
+
 int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
   unsigned char *dest, size_t dlen)
 {
diff --git a/include/linux/unicode.h b/include/linux/unicode.h
index 990aa97d80496..74484d44c7554 100644
--- a/include/linux/unicode.h
+++ b/include/linux/unicode.h
@@ -27,6 +27,9 @@ int utf8_normalize(const struct unicode_map *um, const struct 
qstr *str,
 int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
  unsigned char *dest, size_t dlen);
 
+int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
+  struct qstr *str);
+
 struct unicode_map *utf8_load(const char *version);
 void utf8_unload(struct unicode_map *um);
 
-- 
2.27.0.111.gc72c7da667-goog



[PATCH v9 3/4] f2fs: Use generic casefolding support

2020-06-23 Thread Daniel Rosenberg
This switches f2fs over to the generic support provided in
commit 5f829feca774 ("fs: Add standard casefolding support")

Signed-off-by: Daniel Rosenberg 
---
 fs/f2fs/dir.c   | 84 +
 fs/f2fs/f2fs.h  |  4 --
 fs/f2fs/super.c | 10 ++---
 fs/f2fs/sysfs.c | 10 +++--
 include/linux/f2fs_fs.h |  3 --
 5 files changed, 21 insertions(+), 90 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d35976785e8c5..b59c2673daa09 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
  struct f2fs_filename *fname)
 {
 #ifdef CONFIG_UNICODE
-   struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+   struct super_block *sb = dir->i_sb;
+   struct f2fs_sb_info *sbi = F2FS_SB(sb);
 
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
   GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
-   fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+   fname->cf_name.len = utf8_casefold(sb->s_encoding,
   fname->usr_fname,
   fname->cf_name.name,
   F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
-   if (f2fs_has_strict_mode(sbi))
+   if (sb_has_enc_strict_mode(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -215,8 +216,9 @@ static struct f2fs_dir_entry *find_in_block(struct inode 
*dir,
 static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr 
*name,
   const u8 *de_name, u32 de_name_len)
 {
-   const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
-   const struct unicode_map *um = sbi->s_encoding;
+   const struct super_block *sb = dir->i_sb;
+   const struct f2fs_sb_info *sbi = F2FS_SB(sb);
+   const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
 
@@ -226,7 +228,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, 
const struct qstr *name,
 * In strict mode, ignore invalid names.  In non-strict mode,
 * fall back to treating them as opaque byte sequences.
 */
-   if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+   if (sb_has_enc_strict_mode(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -1107,75 +1109,9 @@ const struct file_operations f2fs_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *dir = READ_ONCE(parent->d_inode);
-   const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   struct qstr entry = QSTR_INIT(str, len);
-   char strbuf[DNAME_INLINE_LEN];
-   int res;
-
-   if (!dir || !IS_CASEFOLDED(dir))
-   goto fallback;
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   entry.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   res = utf8_strncasecmp(sbi->s_encoding, name, );
-   if (res >= 0)
-   return res;
-
-   if (f2fs_has_strict_mode(sbi))
-   return -EINVAL;
-fallback:
-   if (len != name->len)
-   return 1;
-   return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-

[PATCH v9 4/4] ext4: Use generic casefolding support

2020-06-23 Thread Daniel Rosenberg
This switches ext4 over to the generic support provided in
commit 5f829feca774 ("fs: Add standard casefolding support")

Signed-off-by: Daniel Rosenberg 
---
 fs/ext4/dir.c   | 64 ++---
 fs/ext4/ext4.h  | 12 --
 fs/ext4/hash.c  |  2 +-
 fs/ext4/namei.c | 20 +++-
 fs/ext4/super.c | 12 +-
 5 files changed, 17 insertions(+), 93 deletions(-)

diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d82336b1cd45..b437120f0b3f5 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -669,68 +669,8 @@ const struct file_operations ext4_dir_operations = {
 };
 
 #ifdef CONFIG_UNICODE
-static int ext4_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
-   struct qstr qstr = {.name = str, .len = len };
-   const struct dentry *parent = READ_ONCE(dentry->d_parent);
-   const struct inode *inode = READ_ONCE(parent->d_inode);
-   char strbuf[DNAME_INLINE_LEN];
-
-   if (!inode || !IS_CASEFOLDED(inode) ||
-   !EXT4_SB(inode->i_sb)->s_encoding) {
-   if (len != name->len)
-   return -1;
-   return memcmp(str, name->name, len);
-   }
-
-   /*
-* If the dentry name is stored in-line, then it may be concurrently
-* modified by a rename.  If this happens, the VFS will eventually retry
-* the lookup, so it doesn't matter what ->d_compare() returns.
-* However, it's unsafe to call utf8_strncasecmp() with an unstable
-* string.  Therefore, we have to copy the name into a temporary buffer.
-*/
-   if (len <= DNAME_INLINE_LEN - 1) {
-   memcpy(strbuf, str, len);
-   strbuf[len] = 0;
-   qstr.name = strbuf;
-   /* prevent compiler from optimizing out the temporary buffer */
-   barrier();
-   }
-
-   return ext4_ci_compare(inode, name, , false);
-}
-
-static int ext4_d_hash(const struct dentry *dentry, struct qstr *str)
-{
-   const struct ext4_sb_info *sbi = EXT4_SB(dentry->d_sb);
-   const struct unicode_map *um = sbi->s_encoding;
-   const struct inode *inode = READ_ONCE(dentry->d_inode);
-   unsigned char *norm;
-   int len, ret = 0;
-
-   if (!inode || !IS_CASEFOLDED(inode) || !um)
-   return 0;
-
-   norm = kmalloc(PATH_MAX, GFP_ATOMIC);
-   if (!norm)
-   return -ENOMEM;
-
-   len = utf8_casefold(um, str, norm, PATH_MAX);
-   if (len < 0) {
-   if (ext4_has_strict_mode(sbi))
-   ret = -EINVAL;
-   goto out;
-   }
-   str->hash = full_name_hash(dentry, norm, len);
-out:
-   kfree(norm);
-   return ret;
-}
-
 const struct dentry_operations ext4_dentry_ops = {
-   .d_hash = ext4_d_hash,
-   .d_compare = ext4_d_compare,
+   .d_hash = generic_ci_d_hash,
+   .d_compare = generic_ci_d_compare,
 };
 #endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 42f5060f3cdf1..5cd8be24a4fd9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1393,14 +1393,6 @@ struct ext4_super_block {
 
 #define EXT4_ENC_UTF8_12_1 1
 
-/*
- * Flags for ext4_sb_info.s_encoding_flags.
- */
-#define EXT4_ENC_STRICT_MODE_FL(1 << 0)
-
-#define ext4_has_strict_mode(sbi) \
-   (sbi->s_encoding_flags & EXT4_ENC_STRICT_MODE_FL)
-
 /*
  * fourth extended-fs super-block data in memory
  */
@@ -1450,10 +1442,6 @@ struct ext4_sb_info {
struct kobject s_kobj;
struct completion s_kobj_unregister;
struct super_block *s_sb;
-#ifdef CONFIG_UNICODE
-   struct unicode_map *s_encoding;
-   __u16 s_encoding_flags;
-#endif
 
/* Journaling */
struct journal_s *s_journal;
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 3e133793a5a34..143b0073b3f46 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -275,7 +275,7 @@ int ext4fs_dirhash(const struct inode *dir, const char 
*name, int len,
   struct dx_hash_info *hinfo)
 {
 #ifdef CONFIG_UNICODE
-   const struct unicode_map *um = EXT4_SB(dir->i_sb)->s_encoding;
+   const struct unicode_map *um = dir->i_sb->s_encoding;
int r, dlen;
unsigned char *buff;
struct qstr qstr = {.name = name, .len = len };
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 56738b538ddf4..7e9fb77fd2cc7 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1286,8 +1286,8 @@ static void dx_insert_block(struct dx_frame *frame, u32 
hash, ext4_lblk_t block)
 int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
const struct qstr *entry, bool quick)
 {
-   const struct ext4_sb_info *sbi = EXT4_SB(parent->i_sb);
-   const struct unicode_map *um = sbi->s_encoding;
+   const struct super_block *sb = parent->i_sb;
+   const struct unicode_map *u

[PATCH v9 0/4] Prepare for upcoming Casefolding/Encryption patches

2020-06-23 Thread Daniel Rosenberg
This lays the ground work for enabling casefolding and encryption at the
same time for ext4 and f2fs. A future set of patches will enable that
functionality. These unify the highly similar dentry_operations that ext4
and f2fs both use for casefolding.

Daniel Rosenberg (4):
  unicode: Add utf8_casefold_hash
  fs: Add standard casefolding support
  f2fs: Use generic casefolding support
  ext4: Use generic casefolding support

 fs/ext4/dir.c   |  64 +
 fs/ext4/ext4.h  |  12 -
 fs/ext4/hash.c  |   2 +-
 fs/ext4/namei.c |  20 
 fs/ext4/super.c |  12 ++---
 fs/f2fs/dir.c   |  84 -
 fs/f2fs/f2fs.h  |   4 --
 fs/f2fs/super.c |  10 ++--
 fs/f2fs/sysfs.c |  10 ++--
 fs/libfs.c  | 101 
 fs/unicode/utf8-core.c  |  23 -
 include/linux/f2fs_fs.h |   3 --
 include/linux/fs.h  |  22 +
 include/linux/unicode.h |   3 ++
 14 files changed, 186 insertions(+), 184 deletions(-)

-- 
2.27.0.111.gc72c7da667-goog



[PATCH v9 2/4] fs: Add standard casefolding support

2020-06-23 Thread Daniel Rosenberg
This adds general supporting functions for filesystems that use
utf8 casefolding. It provides standard dentry_operations and adds the
necessary structures in struct super_block to allow this standardization.

Ext4 and F2fs will switch to these common implementations.

Signed-off-by: Daniel Rosenberg 
---
 fs/libfs.c | 101 +
 include/linux/fs.h |  22 ++
 2 files changed, 123 insertions(+)

diff --git a/fs/libfs.c b/fs/libfs.c
index 4d08edf19c782..f7345a5ed562f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -20,6 +20,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 
@@ -1363,3 +1365,102 @@ bool is_empty_dir_inode(struct inode *inode)
return (inode->i_fop == _dir_operations) &&
(inode->i_op == _dir_inode_operations);
 }
+
+#ifdef CONFIG_UNICODE
+/**
+ * needs_casefold - generic helper to determine if a filename should be 
casefolded
+ * @dir: Parent directory
+ *
+ * Generic helper for filesystems to use to determine if the name of a dentry
+ * should be casefolded. It does not make sense to casefold the no-key token of
+ * an encrypted filename.
+ *
+ * Return: if names will need casefolding
+ */
+bool needs_casefold(const struct inode *dir)
+{
+   return IS_CASEFOLDED(dir) && dir->i_sb->s_encoding &&
+   (!IS_ENCRYPTED(dir) || fscrypt_has_encryption_key(dir));
+}
+EXPORT_SYMBOL(needs_casefold);
+
+/**
+ * generic_ci_d_compare - generic d_compare implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are checking against
+ * @len:   len of name of dentry
+ * @str:   str pointer to name of dentry
+ * @name:  Name to compare against
+ *
+ * Return: 0 if names match, 1 if mismatch, or -ERRNO
+ */
+int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
+ const char *str, const struct qstr *name)
+{
+   const struct dentry *parent = READ_ONCE(dentry->d_parent);
+   const struct inode *inode = READ_ONCE(parent->d_inode);
+   const struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   struct qstr qstr = QSTR_INIT(str, len);
+   char strbuf[DNAME_INLINE_LEN];
+   int ret;
+
+   if (!inode || !needs_casefold(inode))
+   goto fallback;
+   /*
+* If the dentry name is stored in-line, then it may be concurrently
+* modified by a rename.  If this happens, the VFS will eventually retry
+* the lookup, so it doesn't matter what ->d_compare() returns.
+* However, it's unsafe to call utf8_strncasecmp() with an unstable
+* string.  Therefore, we have to copy the name into a temporary buffer.
+*/
+   if (len <= DNAME_INLINE_LEN - 1) {
+   memcpy(strbuf, str, len);
+   strbuf[len] = 0;
+   qstr.name = strbuf;
+   /* prevent compiler from optimizing out the temporary buffer */
+   barrier();
+   }
+   ret = utf8_strncasecmp(um, name, );
+   if (ret >= 0)
+   return ret;
+
+   if (sb_has_enc_strict_mode(sb))
+   return -EINVAL;
+fallback:
+   if (len != name->len)
+   return 1;
+   return !!memcmp(str, name->name, len);
+}
+EXPORT_SYMBOL(generic_ci_d_compare);
+
+/**
+ * generic_ci_d_hash - generic d_hash implementation for casefolding 
filesystems
+ * @dentry:dentry whose name we are hashing
+ * @str:   qstr of name whose hash we should fill in
+ *
+ * Return: 0 if hash was successful, or -ERRNO
+ */
+int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
+{
+   const struct inode *inode = READ_ONCE(dentry->d_inode);
+   struct super_block *sb = dentry->d_sb;
+   const struct unicode_map *um = sb->s_encoding;
+   int ret = 0;
+
+   if (!inode || !needs_casefold(inode))
+   return 0;
+
+   ret = utf8_casefold_hash(um, dentry, str);
+   if (ret < 0)
+   goto err;
+
+   return 0;
+err:
+   if (sb_has_enc_strict_mode(sb))
+   ret = -EINVAL;
+   else
+   ret = 0;
+   return ret;
+}
+EXPORT_SYMBOL(generic_ci_d_hash);
+#endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f881a892ea74..261904e06873b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1392,6 +1392,12 @@ extern int send_sigurg(struct fown_struct *fown);
 #define SB_ACTIVE  (1<<30)
 #define SB_NOUSER  (1<<31)
 
+/* These flags relate to encoding and casefolding */
+#define SB_ENC_STRICT_MODE_FL  (1 << 0)
+
+#define sb_has_enc_strict_mode(sb) \
+   (sb->s_encoding_flags & SB_ENC_STRICT_MODE_FL)
+
 /*
  * Umount options
  */
@@ -1461,6 +1467,10 @@ struct super_block {
 #endif
 #ifdef CONFIG_FS_VERITY
const struct fsverity_operations *s_vop;
+#endif
+#

Re: [PATCH] staging: android: ion: check for kref overflow

2018-08-31 Thread Daniel Rosenberg




On 08/31/2018 08:56 AM, Greg Kroah-Hartman wrote:

On Thu, Aug 30, 2018 at 06:36:18PM -0700, Daniel Rosenberg wrote:

The sign off was on the 4.4.y version that I cherry-picked this from.

Ah that wasn't obvious at all.  What is that git commit id?  You need to
give us a hint as to what is going on when you do that :)

b84ec04bae905901("staging: android: ion: check for kref overflow") in 4.4.y

There was a trivial conflict moving it to 4.9, but it did not modify
any changed lines, so I hadn't thought that was worth noting on the
patch. I apologise if leaving the signed-off-by was incorrect here.

Why did I only apply this to 4.4 and not 4.9 when the original patch was
submitted?  That seems odd.

thanks,

greg k-h
I don't know. I had included it in the range of kernel versions it 
should be applied to in the original patch, and noted the minor conflict 
for later kernel versions. You added it in 3.18 and 4.4, and I assumed 
not 4.9 because of the conflict in applying the patch, so I sent this 
version.


b1fa6d8acb50c8e9 ("staging: android: ion: Pull out ion ioctls to a 
separate file") is the patch that causes the minor conflict in applying 
the original patch.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface") is 
the patch that removes the affected code altogether in later kernels 
versions.


Re: [PATCH] staging: android: ion: check for kref overflow

2018-08-31 Thread Daniel Rosenberg




On 08/31/2018 08:56 AM, Greg Kroah-Hartman wrote:

On Thu, Aug 30, 2018 at 06:36:18PM -0700, Daniel Rosenberg wrote:

The sign off was on the 4.4.y version that I cherry-picked this from.

Ah that wasn't obvious at all.  What is that git commit id?  You need to
give us a hint as to what is going on when you do that :)

b84ec04bae905901("staging: android: ion: check for kref overflow") in 4.4.y

There was a trivial conflict moving it to 4.9, but it did not modify
any changed lines, so I hadn't thought that was worth noting on the
patch. I apologise if leaving the signed-off-by was incorrect here.

Why did I only apply this to 4.4 and not 4.9 when the original patch was
submitted?  That seems odd.

thanks,

greg k-h
I don't know. I had included it in the range of kernel versions it 
should be applied to in the original patch, and noted the minor conflict 
for later kernel versions. You added it in 3.18 and 4.4, and I assumed 
not 4.9 because of the conflict in applying the patch, so I sent this 
version.


b1fa6d8acb50c8e9 ("staging: android: ion: Pull out ion ioctls to a 
separate file") is the patch that causes the minor conflict in applying 
the original patch.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface") is 
the patch that removes the affected code altogether in later kernels 
versions.


Re: [PATCH] staging: android: ion: check for kref overflow

2018-08-30 Thread Daniel Rosenberg

On 08/30/2018 05:41 PM, Greg Kroah-Hartman wrote:

On Thu, Aug 30, 2018 at 04:09:46PM -0700, Daniel Rosenberg wrote:

This patch is against 4.9. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Greg Kroah-Hartman 

I signed off on this?  Where?  When?  Are you sure?

greg k-h
The sign off was on the 4.4.y version that I cherry-picked this from. 
There was a trivial conflict moving it to 4.9, but it did not modify any 
changed lines, so I hadn't thought that was worth noting on the patch. I 
apologise if leaving the signed-off-by was incorrect here.


-Daniel


Re: [PATCH] staging: android: ion: check for kref overflow

2018-08-30 Thread Daniel Rosenberg

On 08/30/2018 05:41 PM, Greg Kroah-Hartman wrote:

On Thu, Aug 30, 2018 at 04:09:46PM -0700, Daniel Rosenberg wrote:

This patch is against 4.9. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Greg Kroah-Hartman 

I signed off on this?  Where?  When?  Are you sure?

greg k-h
The sign off was on the 4.4.y version that I cherry-picked this from. 
There was a trivial conflict moving it to 4.9, but it did not modify any 
changed lines, so I hadn't thought that was worth noting on the patch. I 
apologise if leaving the signed-off-by was incorrect here.


-Daniel


[PATCH] staging: android: ion: check for kref overflow

2018-08-30 Thread Daniel Rosenberg
This patch is against 4.9. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/android/ion/ion.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 6f9974cb0e152..48821948fa487 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -305,6 +306,16 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 int ion_handle_put_nolock(struct ion_handle *handle)
 {
return kref_put(>ref, ion_handle_destroy);
@@ -347,9 +358,9 @@ struct ion_handle *ion_handle_get_by_id_nolock(struct 
ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1100,7 +,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client,
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog



[PATCH] staging: android: ion: check for kref overflow

2018-08-30 Thread Daniel Rosenberg
This patch is against 4.9. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
Signed-off-by: Greg Kroah-Hartman 
---
 drivers/staging/android/ion/ion.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 6f9974cb0e152..48821948fa487 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -305,6 +306,16 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 int ion_handle_put_nolock(struct ion_handle *handle)
 {
return kref_put(>ref, ion_handle_destroy);
@@ -347,9 +358,9 @@ struct ion_handle *ion_handle_get_by_id_nolock(struct 
ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1100,7 +,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client,
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.19.0.rc0.228.g281dcd1b4d0-goog



[PATCH v2] staging: android: ion: check for kref overflow

2018-08-21 Thread Daniel Rosenberg
Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
---
v2: Fixed patch corruption :(

This patch is against 4.4. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

It applies from 3.18 to 4.11, although with a trivial conflict resolution
for the later branches.
 drivers/staging/android/ion/ion.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 374f840f31a48..47cb163da9a07 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,16 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 static int ion_handle_put_nolock(struct ion_handle *handle)
 {
int ret;
@@ -433,9 +444,9 @@ static struct ion_handle 
*ion_handle_get_by_id_nolock(struct ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1202,7 +1213,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client, int fd)
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH v2] staging: android: ion: check for kref overflow

2018-08-21 Thread Daniel Rosenberg
Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
---
v2: Fixed patch corruption :(

This patch is against 4.4. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

It applies from 3.18 to 4.11, although with a trivial conflict resolution
for the later branches.
 drivers/staging/android/ion/ion.c | 17 ++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 374f840f31a48..47cb163da9a07 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,16 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 static int ion_handle_put_nolock(struct ion_handle *handle)
 {
int ret;
@@ -433,9 +444,9 @@ static struct ion_handle 
*ion_handle_get_by_id_nolock(struct ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1202,7 +1213,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client, int fd)
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH] staging: android: ion: check for kref overflow

2018-08-20 Thread Daniel Rosenberg
Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
---

This patch is against 4.4. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
It applies from 3.18 to 4.11, although with a trivial conflict resolution
for the later branches.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

 drivers/staging/android/ion/ion.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 374f840f31a48..11f93a6314fdb 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,15 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 static int ion_handle_put_nolock(struct ion_handle *handle)
 {
int ret;
@@ -433,9 +443,9 @@ static struct ion_handle 
*ion_handle_get_by_id_nolock(struct ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1202,7 +1212,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client, int fd)
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH] staging: android: ion: check for kref overflow

2018-08-20 Thread Daniel Rosenberg
Userspace can cause the kref to handles to increment
arbitrarily high. Ensure it does not overflow.

Signed-off-by: Daniel Rosenberg 
---

This patch is against 4.4. It does not apply to master due to a large
rework of ion in 4.12 which removed the affected functions altogther.
It applies from 3.18 to 4.11, although with a trivial conflict resolution
for the later branches.
4c23cbff073f3b9b ("staging: android: ion: Remove import interface")

 drivers/staging/android/ion/ion.c | 16 +---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/android/ion/ion.c 
b/drivers/staging/android/ion/ion.c
index 374f840f31a48..11f93a6314fdb 100644
--- a/drivers/staging/android/ion/ion.c
+++ b/drivers/staging/android/ion/ion.c
@@ -15,6 +15,7 @@
  *
  */
 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,15 @@ static void ion_handle_get(struct ion_handle *handle)
kref_get(>ref);
 }
 
+/* Must hold the client lock */
+static struct ion_handle *ion_handle_get_check_overflow(
+   struct ion_handle *handle)
+{
+   if (atomic_read(>ref.refcount) + 1 == 0)
+   return ERR_PTR(-EOVERFLOW);
+   ion_handle_get(handle);
+   return handle;
+}
+
 static int ion_handle_put_nolock(struct ion_handle *handle)
 {
int ret;
@@ -433,9 +443,9 @@ static struct ion_handle 
*ion_handle_get_by_id_nolock(struct ion_client *client,
 
handle = idr_find(>idr, id);
if (handle)
-   ion_handle_get(handle);
+   return ion_handle_get_check_overflow(handle);
 
-   return handle ? handle : ERR_PTR(-EINVAL);
+   return ERR_PTR(-EINVAL);
 }
 
 struct ion_handle *ion_handle_get_by_id(struct ion_client *client,
@@ -1202,7 +1212,7 @@ struct ion_handle *ion_import_dma_buf(struct ion_client 
*client, int fd)
/* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR(handle)) {
-   ion_handle_get(handle);
+   handle = ion_handle_get_check_overflow(handle);
mutex_unlock(>lock);
goto end;
}
-- 
2.18.0.865.gffc8e1a3cd6-goog



[PATCH] HID: debug: check length before copy_to_user()

2018-07-02 Thread Daniel Rosenberg
If our length is greater than the size of the buffer, we
overflow the buffer

Signed-off-by: Daniel Rosenberg 
Cc: sta...@vger.kernel.org
---
 drivers/hid/hid-debug.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 8469b6964ff64..b48100236df89 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1154,6 +1154,8 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
goto out;
if (list->tail > list->head) {
len = list->tail - list->head;
+   if (len > count)
+   len = count;
 
if (copy_to_user(buffer + ret, 
>hid_debug_buf[list->head], len)) {
ret = -EFAULT;
@@ -1163,6 +1165,8 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
list->head += len;
} else {
len = HID_DEBUG_BUFSIZE - list->head;
+   if (len > count)
+   len = count;
 
if (copy_to_user(buffer, 
>hid_debug_buf[list->head], len)) {
ret = -EFAULT;
@@ -1170,7 +1174,9 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
}
list->head = 0;
ret += len;
-   goto copy_rest;
+   count -= len;
+   if (count > 0)
+   goto copy_rest;
}
 
}
-- 
2.18.0.399.gad0ab374a1-goog



[PATCH] HID: debug: check length before copy_to_user()

2018-07-02 Thread Daniel Rosenberg
If our length is greater than the size of the buffer, we
overflow the buffer

Signed-off-by: Daniel Rosenberg 
Cc: sta...@vger.kernel.org
---
 drivers/hid/hid-debug.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c
index 8469b6964ff64..b48100236df89 100644
--- a/drivers/hid/hid-debug.c
+++ b/drivers/hid/hid-debug.c
@@ -1154,6 +1154,8 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
goto out;
if (list->tail > list->head) {
len = list->tail - list->head;
+   if (len > count)
+   len = count;
 
if (copy_to_user(buffer + ret, 
>hid_debug_buf[list->head], len)) {
ret = -EFAULT;
@@ -1163,6 +1165,8 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
list->head += len;
} else {
len = HID_DEBUG_BUFSIZE - list->head;
+   if (len > count)
+   len = count;
 
if (copy_to_user(buffer, 
>hid_debug_buf[list->head], len)) {
ret = -EFAULT;
@@ -1170,7 +1174,9 @@ static ssize_t hid_debug_events_read(struct file *file, 
char __user *buffer,
}
list->head = 0;
ret += len;
-   goto copy_rest;
+   count -= len;
+   if (count > 0)
+   goto copy_rest;
}
 
}
-- 
2.18.0.399.gad0ab374a1-goog



[PATCH] drivers: dma-buf: Change %p to %pK in debug messages

2018-05-02 Thread Daniel Rosenberg
The format specifier %p can leak kernel addresses
while not valuing the kptr_restrict system settings.
Use %pK instead of %p, which also evaluates whether
kptr_restrict is set.

Signed-off-by: Divya Ponnusamy <pdi...@codeaurora.org>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
Cc: stable <sta...@vger.kernel.org>
---
 drivers/dma-buf/sync_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index c4c8ecb24aa9..d8d340542a79 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -133,7 +133,7 @@ static void sync_print_sync_file(struct seq_file *s,
char buf[128];
int i;
 
-   seq_printf(s, "[%p] %s: %s\n", sync_file,
+   seq_printf(s, "[%pK] %s: %s\n", sync_file,
   sync_file_get_name(sync_file, buf, sizeof(buf)),
   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
-- 
2.17.0.441.gb46fe60e1d-goog



[PATCH] drivers: dma-buf: Change %p to %pK in debug messages

2018-05-02 Thread Daniel Rosenberg
The format specifier %p can leak kernel addresses
while not valuing the kptr_restrict system settings.
Use %pK instead of %p, which also evaluates whether
kptr_restrict is set.

Signed-off-by: Divya Ponnusamy 
Signed-off-by: Daniel Rosenberg 
Cc: stable 
---
 drivers/dma-buf/sync_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma-buf/sync_debug.c b/drivers/dma-buf/sync_debug.c
index c4c8ecb24aa9..d8d340542a79 100644
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@@ -133,7 +133,7 @@ static void sync_print_sync_file(struct seq_file *s,
char buf[128];
int i;
 
-   seq_printf(s, "[%p] %s: %s\n", sync_file,
+   seq_printf(s, "[%pK] %s: %s\n", sync_file,
   sync_file_get_name(sync_file, buf, sizeof(buf)),
   sync_status_str(dma_fence_get_status(sync_file->fence)));
 
-- 
2.17.0.441.gb46fe60e1d-goog



[PATCH 4/5] Squashfs: implement .readpages()

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht <adrien+...@schischi.me>

Squashfs does not implement .readpages(), so the kernel just repeatedly
calls .readpage().

The readpages function tries to pack as much pages as possible in the
same page actor so that only 1 read request is issued.

Now that the read requests are asynchronous, the kernel can truly
prefetch pages using its readahead algorithm.

Signed-off-by: Adrien Schildknecht <adri...@google.com>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
---
 fs/squashfs/file.c| 137 ++
 fs/squashfs/file_direct.c |  62 ++---
 fs/squashfs/squashfs.h|   5 +-
 3 files changed, 146 insertions(+), 58 deletions(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 13d80947bf9e..bb2e77ee4209 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -438,6 +439,21 @@ static int squashfs_readpage_fragment(struct page *page)
return res;
 }
 
+static int squashfs_readpages_fragment(struct page *page,
+   struct list_head *readahead_pages, struct address_space *mapping)
+{
+   if (!page) {
+   page = lru_to_page(readahead_pages);
+   list_del(>lru);
+   if (add_to_page_cache_lru(page, mapping, page->index,
+   mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+   put_page(page);
+   return 0;
+   }
+   }
+   return squashfs_readpage_fragment(page);
+}
+
 static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
 {
struct inode *inode = page->mapping->host;
@@ -450,54 +466,105 @@ static int squashfs_readpage_sparse(struct page *page, 
int index, int file_end)
return 0;
 }
 
-static int squashfs_readpage(struct file *file, struct page *page)
+static int squashfs_readpages_sparse(struct page *page,
+   struct list_head *readahead_pages, int index, int file_end,
+   struct address_space *mapping)
 {
-   struct inode *inode = page->mapping->host;
+   if (!page) {
+   page = lru_to_page(readahead_pages);
+   list_del(>lru);
+   if (add_to_page_cache_lru(page, mapping, page->index,
+   mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+   put_page(page);
+   return 0;
+   }
+   }
+   return squashfs_readpage_sparse(page, index, file_end);
+}
+
+static int __squashfs_readpages(struct file *file, struct page *page,
+   struct list_head *readahead_pages, unsigned int nr_pages,
+   struct address_space *mapping)
+{
+   struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-   int index = page->index >> (msblk->block_log - PAGE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
-   void *pageaddr;
 
-   TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-   page->index, squashfs_i(inode)->start);
+   do {
+   struct page *cur_page = page ? page
+: lru_to_page(readahead_pages);
+   int page_index = cur_page->index;
+   int index = page_index >> (msblk->block_log - PAGE_SHIFT);
+
+   if (page_index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+   PAGE_SHIFT))
+   return 1;
+
+   if (index < file_end || squashfs_i(inode)->fragment_block ==
+   SQUASHFS_INVALID_BLK) {
+   u64 block = 0;
+   int bsize = read_blocklist(inode, index, );
+
+   if (bsize < 0)
+   return -1;
+
+   if (bsize == 0) {
+   res = squashfs_readpages_sparse(page,
+   readahead_pages, index, file_end,
+   mapping);
+   } else {
+   res = squashfs_readpages_block(page,
+   readahead_pages, _pages, mapping,
+   page_index, block, bsize);
+   }
+   } else {
+   res = squashfs_readpages_fragment(page,
+   readahead_pages, mapping);
+   }
+   if (res)
+   return 0;
+   page = NULL;
+   } while (readahead_pages && !list_empty(readahead_pages));
+
+   return 0;
+}
+
+static

[PATCH 4/5] Squashfs: implement .readpages()

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht 

Squashfs does not implement .readpages(), so the kernel just repeatedly
calls .readpage().

The readpages function tries to pack as much pages as possible in the
same page actor so that only 1 read request is issued.

Now that the read requests are asynchronous, the kernel can truly
prefetch pages using its readahead algorithm.

Signed-off-by: Adrien Schildknecht 
Signed-off-by: Daniel Rosenberg 
---
 fs/squashfs/file.c| 137 ++
 fs/squashfs/file_direct.c |  62 ++---
 fs/squashfs/squashfs.h|   5 +-
 3 files changed, 146 insertions(+), 58 deletions(-)

diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 13d80947bf9e..bb2e77ee4209 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -438,6 +439,21 @@ static int squashfs_readpage_fragment(struct page *page)
return res;
 }
 
+static int squashfs_readpages_fragment(struct page *page,
+   struct list_head *readahead_pages, struct address_space *mapping)
+{
+   if (!page) {
+   page = lru_to_page(readahead_pages);
+   list_del(>lru);
+   if (add_to_page_cache_lru(page, mapping, page->index,
+   mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+   put_page(page);
+   return 0;
+   }
+   }
+   return squashfs_readpage_fragment(page);
+}
+
 static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
 {
struct inode *inode = page->mapping->host;
@@ -450,54 +466,105 @@ static int squashfs_readpage_sparse(struct page *page, 
int index, int file_end)
return 0;
 }
 
-static int squashfs_readpage(struct file *file, struct page *page)
+static int squashfs_readpages_sparse(struct page *page,
+   struct list_head *readahead_pages, int index, int file_end,
+   struct address_space *mapping)
 {
-   struct inode *inode = page->mapping->host;
+   if (!page) {
+   page = lru_to_page(readahead_pages);
+   list_del(>lru);
+   if (add_to_page_cache_lru(page, mapping, page->index,
+   mapping_gfp_constraint(mapping, GFP_KERNEL))) {
+   put_page(page);
+   return 0;
+   }
+   }
+   return squashfs_readpage_sparse(page, index, file_end);
+}
+
+static int __squashfs_readpages(struct file *file, struct page *page,
+   struct list_head *readahead_pages, unsigned int nr_pages,
+   struct address_space *mapping)
+{
+   struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-   int index = page->index >> (msblk->block_log - PAGE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
-   void *pageaddr;
 
-   TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-   page->index, squashfs_i(inode)->start);
+   do {
+   struct page *cur_page = page ? page
+: lru_to_page(readahead_pages);
+   int page_index = cur_page->index;
+   int index = page_index >> (msblk->block_log - PAGE_SHIFT);
+
+   if (page_index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+   PAGE_SHIFT))
+   return 1;
+
+   if (index < file_end || squashfs_i(inode)->fragment_block ==
+   SQUASHFS_INVALID_BLK) {
+   u64 block = 0;
+   int bsize = read_blocklist(inode, index, );
+
+   if (bsize < 0)
+   return -1;
+
+   if (bsize == 0) {
+   res = squashfs_readpages_sparse(page,
+   readahead_pages, index, file_end,
+   mapping);
+   } else {
+   res = squashfs_readpages_block(page,
+   readahead_pages, _pages, mapping,
+   page_index, block, bsize);
+   }
+   } else {
+   res = squashfs_readpages_fragment(page,
+   readahead_pages, mapping);
+   }
+   if (res)
+   return 0;
+   page = NULL;
+   } while (readahead_pages && !list_empty(readahead_pages));
+
+   return 0;
+}
+
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+   i

[PATCH 3/5] Squashfs: replace buffer_head with BIO

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht <adri...@google.com>

The 'll_rw_block' has been deprecated and BIO is now the basic container
for block I/O within the kernel.

Switching to BIO offers 2 advantages:
  1/ It removes synchronous wait for the up-to-date buffers: SquashFS
 now deals with decompressions/copies asynchronously.
 Implementing an asynchronous mechanism to read data is needed to
 efficiently implement .readpages().
  2/ Prior to this patch, merging the read requests entirely depends on
 the IO scheduler. SquashFS has more information than the IO
 scheduler about what could be merged. Moreover, merging the reads
 at the FS level means that we rely less on the IO scheduler.

Signed-off-by: Adrien Schildknecht <adri...@google.com>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
---
 fs/squashfs/block.c   | 522 +-
 fs/squashfs/file_direct.c | 195 ++---
 fs/squashfs/squashfs.h|   4 +
 fs/squashfs/super.c   |   7 +
 4 files changed, 456 insertions(+), 272 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2751476e6b6e..252dfc82ae72 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -32,6 +32,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -39,45 +41,357 @@
 #include "decompressor.h"
 #include "page_actor.h"
 
-/*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
- */
-static struct buffer_head *get_block_length(struct super_block *sb,
-   u64 *cur_index, int *offset, int *length)
+static struct workqueue_struct *squashfs_read_wq;
+
+struct squashfs_read_request {
+   struct super_block *sb;
+   u64 index;
+   int length;
+   int compressed;
+   int offset;
+   u64 read_end;
+   struct squashfs_page_actor *output;
+   enum {
+   SQUASHFS_COPY,
+   SQUASHFS_DECOMPRESS,
+   SQUASHFS_METADATA,
+   } data_processing;
+   bool synchronous;
+
+   /*
+* If the read is synchronous, it is possible to retrieve information
+* about the request by setting these pointers.
+*/
+   int *res;
+   int *bytes_read;
+   int *bytes_uncompressed;
+
+   int nr_buffers;
+   struct buffer_head **bh;
+   struct work_struct offload;
+};
+
+struct squashfs_bio_request {
+   struct buffer_head **bh;
+   int nr_buffers;
+};
+
+static int squashfs_bio_submit(struct squashfs_read_request *req);
+
+int squashfs_init_read_wq(void)
+{
+   squashfs_read_wq = create_workqueue("SquashFS read wq");
+   return !!squashfs_read_wq;
+}
+
+void squashfs_destroy_read_wq(void)
 {
-   struct squashfs_sb_info *msblk = sb->s_fs_info;
+   flush_workqueue(squashfs_read_wq);
+   destroy_workqueue(squashfs_read_wq);
+}
+
+static void free_read_request(struct squashfs_read_request *req, int error)
+{
+   if (!req->synchronous)
+   squashfs_page_actor_free(req->output, error);
+   if (req->res)
+   *(req->res) = error;
+   kfree(req->bh);
+   kfree(req);
+}
+
+static void squashfs_process_blocks(struct squashfs_read_request *req)
+{
+   int error = 0;
+   int bytes, i, length;
+   struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+   struct squashfs_page_actor *actor = req->output;
+   struct buffer_head **bh = req->bh;
+   int nr_buffers = req->nr_buffers;
+
+   for (i = 0; i < nr_buffers; ++i) {
+   if (!bh[i])
+   continue;
+   wait_on_buffer(bh[i]);
+   if (!buffer_uptodate(bh[i]))
+   error = -EIO;
+   }
+   if (error)
+   goto cleanup;
+
+   if (req->data_processing == SQUASHFS_METADATA) {
+   /* Extract the length of the metadata block */
+   if (req->offset != msblk->devblksize - 1) {
+   length = le16_to_cpup((__le16 *)
+   (bh[0]->b_data + req->offset));
+   } else {
+   length = (unsigned char)bh[0]->b_data[req->offset];
+   length |= (unsigned char)bh[1]->b_data[0] << 8;
+   }
+   req->compressed = SQUASHFS_COMPRESSED(length);
+   req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
+  : SQUASHFS_COPY;
+   length = SQUASHFS_COMPRESSED_SIZE(length);
+   if (req->index + length + 2 > req->read_end) {
+   for (i = 0; i < nr_buffers; ++i)
+   put_bh(bh[i]);
+   kfree(bh);
+   

[PATCH 3/5] Squashfs: replace buffer_head with BIO

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht 

The 'll_rw_block' has been deprecated and BIO is now the basic container
for block I/O within the kernel.

Switching to BIO offers 2 advantages:
  1/ It removes synchronous wait for the up-to-date buffers: SquashFS
 now deals with decompressions/copies asynchronously.
 Implementing an asynchronous mechanism to read data is needed to
 efficiently implement .readpages().
  2/ Prior to this patch, merging the read requests entirely depends on
 the IO scheduler. SquashFS has more information than the IO
 scheduler about what could be merged. Moreover, merging the reads
 at the FS level means that we rely less on the IO scheduler.

Signed-off-by: Adrien Schildknecht 
Signed-off-by: Daniel Rosenberg 
---
 fs/squashfs/block.c   | 522 +-
 fs/squashfs/file_direct.c | 195 ++---
 fs/squashfs/squashfs.h|   4 +
 fs/squashfs/super.c   |   7 +
 4 files changed, 456 insertions(+), 272 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 2751476e6b6e..252dfc82ae72 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -32,6 +32,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
@@ -39,45 +41,357 @@
 #include "decompressor.h"
 #include "page_actor.h"
 
-/*
- * Read the metadata block length, this is stored in the first two
- * bytes of the metadata block.
- */
-static struct buffer_head *get_block_length(struct super_block *sb,
-   u64 *cur_index, int *offset, int *length)
+static struct workqueue_struct *squashfs_read_wq;
+
+struct squashfs_read_request {
+   struct super_block *sb;
+   u64 index;
+   int length;
+   int compressed;
+   int offset;
+   u64 read_end;
+   struct squashfs_page_actor *output;
+   enum {
+   SQUASHFS_COPY,
+   SQUASHFS_DECOMPRESS,
+   SQUASHFS_METADATA,
+   } data_processing;
+   bool synchronous;
+
+   /*
+* If the read is synchronous, it is possible to retrieve information
+* about the request by setting these pointers.
+*/
+   int *res;
+   int *bytes_read;
+   int *bytes_uncompressed;
+
+   int nr_buffers;
+   struct buffer_head **bh;
+   struct work_struct offload;
+};
+
+struct squashfs_bio_request {
+   struct buffer_head **bh;
+   int nr_buffers;
+};
+
+static int squashfs_bio_submit(struct squashfs_read_request *req);
+
+int squashfs_init_read_wq(void)
+{
+   squashfs_read_wq = create_workqueue("SquashFS read wq");
+   return !!squashfs_read_wq;
+}
+
+void squashfs_destroy_read_wq(void)
 {
-   struct squashfs_sb_info *msblk = sb->s_fs_info;
+   flush_workqueue(squashfs_read_wq);
+   destroy_workqueue(squashfs_read_wq);
+}
+
+static void free_read_request(struct squashfs_read_request *req, int error)
+{
+   if (!req->synchronous)
+   squashfs_page_actor_free(req->output, error);
+   if (req->res)
+   *(req->res) = error;
+   kfree(req->bh);
+   kfree(req);
+}
+
+static void squashfs_process_blocks(struct squashfs_read_request *req)
+{
+   int error = 0;
+   int bytes, i, length;
+   struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+   struct squashfs_page_actor *actor = req->output;
+   struct buffer_head **bh = req->bh;
+   int nr_buffers = req->nr_buffers;
+
+   for (i = 0; i < nr_buffers; ++i) {
+   if (!bh[i])
+   continue;
+   wait_on_buffer(bh[i]);
+   if (!buffer_uptodate(bh[i]))
+   error = -EIO;
+   }
+   if (error)
+   goto cleanup;
+
+   if (req->data_processing == SQUASHFS_METADATA) {
+   /* Extract the length of the metadata block */
+   if (req->offset != msblk->devblksize - 1) {
+   length = le16_to_cpup((__le16 *)
+   (bh[0]->b_data + req->offset));
+   } else {
+   length = (unsigned char)bh[0]->b_data[req->offset];
+   length |= (unsigned char)bh[1]->b_data[0] << 8;
+   }
+   req->compressed = SQUASHFS_COMPRESSED(length);
+   req->data_processing = req->compressed ? SQUASHFS_DECOMPRESS
+  : SQUASHFS_COPY;
+   length = SQUASHFS_COMPRESSED_SIZE(length);
+   if (req->index + length + 2 > req->read_end) {
+   for (i = 0; i < nr_buffers; ++i)
+   put_bh(bh[i]);
+   kfree(bh);
+   req->length = length;
+   req->index

[PATCH 2/5] Squashfs: refactor page_actor

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht <adri...@google.com>

This patch essentially does 3 things:
  1/ Always use an array of page to store the data instead of a mix of
 buffers and pages.
  2/ It is now possible to have 'holes' in a page actor, i.e. NULL
 pages in the array.
 When reading a block (default 128K), squashfs tries to grab all
 the pages covering this block. If a single page is up-to-date or
 locked, it falls back to using an intermediate buffer to do the
 read and then copy the pages in the actor. Allowing holes in the
 page actor remove the need for this intermediate buffer.
  3/ Refactor the wrappers to share code that deals with page actors.

Signed-off-by: Adrien Schildknecht <adri...@google.com>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
---
 fs/squashfs/cache.c  |  73 +++---
 fs/squashfs/decompressor.c   |  55 +++---
 fs/squashfs/file_direct.c|   4 +-
 fs/squashfs/lz4_wrapper.c|  29 +--
 fs/squashfs/lzo_wrapper.c|  40 ++
 fs/squashfs/page_actor.c | 175 ---
 fs/squashfs/page_actor.h |  52 +
 fs/squashfs/squashfs_fs_sb.h |   2 +-
 fs/squashfs/xz_wrapper.c |  15 +++-
 fs/squashfs/zlib_wrapper.c   |  14 +++-
 10 files changed, 251 insertions(+), 208 deletions(-)

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 23813c078cc9..05e42441d106 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry 
*entry)
  */
 void squashfs_cache_delete(struct squashfs_cache *cache)
 {
-   int i, j;
+   int i;
 
if (cache == NULL)
return;
 
for (i = 0; i < cache->entries; i++) {
-   if (cache->entry[i].data) {
-   for (j = 0; j < cache->pages; j++)
-   kfree(cache->entry[i].data[j]);
-   kfree(cache->entry[i].data);
-   }
+   if (cache->entry[i].page)
+   free_page_array(cache->entry[i].page, cache->pages);
kfree(cache->entry[i].actor);
}
 
@@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
 struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
 {
-   int i, j;
+   int i;
struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 
if (cache == NULL) {
@@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, 
int entries,
init_waitqueue_head(>entry[i].wait_queue);
entry->cache = cache;
entry->block = SQUASHFS_INVALID_BLK;
-   entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
-   if (entry->data == NULL) {
+   entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
+   if (!entry->page) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
}
-
-   for (j = 0; j < cache->pages; j++) {
-   entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
-   if (entry->data[j] == NULL) {
-   ERROR("Failed to allocate %s buffer\n", name);
-   goto cleanup;
-   }
-   }
-
-   entry->actor = squashfs_page_actor_init(entry->data,
-   cache->pages, 0);
+   entry->actor = squashfs_page_actor_init(entry->page,
+   cache->pages, 0, NULL);
if (entry->actor == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
@@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct 
squashfs_cache_entry *entry,
return min(length, entry->length - offset);
 
while (offset < entry->length) {
-   void *buff = entry->data[offset / PAGE_SIZE]
-   + (offset % PAGE_SIZE);
+   void *buff = kmap_atomic(entry->page[offset / PAGE_SIZE])
++ (offset % PAGE_SIZE);
int bytes = min_t(int, entry->length - offset,
PAGE_SIZE - (offset % PAGE_SIZE));
 
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
+   kunmap_atomic(buff);
remaining = 0;
break;
}
 
memcpy(buffer, buff, bytes);
+   kunmap_atomic(buff);
buffer += bytes;
remaining -= bytes;
   

[PATCH 5/5] Squashfs: optimize reading uncompressed data

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht <adri...@google.com>

When dealing with uncompressed data, there is no need to read a whole
block (default 128K) to get the desired page: the pages are
independent from each others.

This patch change the readpages logic so that reading uncompressed
data only read the number of pages advised by the readahead algorithm.

Moreover, if the page actor contains holes (i.e. pages that are already
up-to-date), squashfs skips the buffer_head associated to those pages.

This patch greatly improve the performance of random reads for
uncompressed files because squashfs only read what is needed. It also
reduces the number of unnecessary reads.

Signed-off-by: Adrien Schildknecht <adri...@google.com>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
---
 fs/squashfs/block.c   | 25 +
 fs/squashfs/file_direct.c | 37 ++---
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 252dfc82ae72..37658b6e83ee 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -207,6 +207,22 @@ static void squashfs_bio_end_io(struct bio *bio)
kfree(bio_req);
 }
 
+static int bh_is_optional(struct squashfs_read_request *req, int idx)
+{
+   int start_idx, end_idx;
+   struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+
+   start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT;
+   end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> 
PAGE_SHIFT;
+   if (start_idx >= req->output->pages)
+   return 1;
+   if (start_idx < 0)
+   start_idx = end_idx;
+   if (end_idx >= req->output->pages)
+   end_idx = start_idx;
+   return !req->output->page[start_idx] && !req->output->page[end_idx];
+}
+
 static int actor_getblks(struct squashfs_read_request *req, u64 block)
 {
int i;
@@ -216,6 +232,15 @@ static int actor_getblks(struct squashfs_read_request 
*req, u64 block)
return -ENOMEM;
 
for (i = 0; i < req->nr_buffers; ++i) {
+   /*
+* When dealing with an uncompressed block, the actor may
+* contains NULL pages. There's no need to read the buffers
+* associated with these pages.
+*/
+   if (!req->compressed && bh_is_optional(req, i)) {
+   req->bh[i] = NULL;
+   continue;
+   }
req->bh[i] = sb_getblk(req->sb, block + i);
if (!req->bh[i]) {
while (--i) {
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index a978811de327..dc87f77ce11e 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -111,15 +111,38 @@ int squashfs_readpages_block(struct page *target_page,
struct squashfs_page_actor *actor;
struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-   int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+   int start_index, end_index, file_end, actor_pages, res;
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
-   int start_index = page_index & ~mask;
-   int end_index = start_index | mask;
-   int actor_pages, res;
 
-   if (end_index > file_end)
-   end_index = file_end;
-   actor_pages = end_index - start_index + 1;
+   /*
+* If readpage() is called on an uncompressed datablock, we can just
+* read the pages instead of fetching the whole block.
+* This greatly improves the performance when a process keep doing
+* random reads because we only fetch the necessary data.
+* The readahead algorithm will take care of doing speculative reads
+* if necessary.
+* We can't read more than 1 block even if readahead provides use more
+* pages because we don't know yet if the next block is compressed or
+* not.
+*/
+   if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
+   u64 block_end = block + msblk->block_size;
+
+   block += (page_index & mask) * PAGE_SIZE;
+   actor_pages = (block_end - block) / PAGE_SIZE;
+   if (*nr_pages < actor_pages)
+   actor_pages = *nr_pages;
+   start_index = page_index;
+   bsize = min_t(int, bsize, (PAGE_SIZE * actor_pages)
+ | SQUASHFS_COMPRESSED_BIT_BLOCK);
+   } else {
+   file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+   start_index = page_index & ~mask;
+   end_index = start_index | mask;
+   if (end_index > file_end)

[PATCH 0/5] Squashfs Whitelist and Compression Threshold

2017-09-22 Thread Daniel Rosenberg
These patches contain several optimizations to Squashfs. ll_rw_block is
replaced with submit_bio. readpages is implemented to support asynchronous
readahead. Uncompressed file reads are optimized, no longer requiring the
entire block to be read if the block doesn't need to be decompressed, which
greatly improves random read speeds in uncompressed files. There is a separate
set of patches to the userspace tools adding a whitelist of files to not
compress, and an ability to set a compression threshold to avoid compressing
files where there isn't a significant advantage made by compressing.

Adrien Schildknecht (5):
  Squashfs: remove the FILE_CACHE option
  Squashfs: refactor page_actor
  Squashfs: replace buffer_head with BIO
  Squashfs: implement .readpages()
  Squashfs: optimize reading uncompressed data

 fs/squashfs/Kconfig  |  28 ---
 fs/squashfs/Makefile |   3 +-
 fs/squashfs/block.c  | 547 ---
 fs/squashfs/cache.c  |  73 +++---
 fs/squashfs/decompressor.c   |  55 +++--
 fs/squashfs/file.c   | 137 ---
 fs/squashfs/file_cache.c |  38 ---
 fs/squashfs/file_direct.c| 240 +--
 fs/squashfs/lz4_wrapper.c|  29 +--
 fs/squashfs/lzo_wrapper.c|  40 +---
 fs/squashfs/page_actor.c | 175 +-
 fs/squashfs/page_actor.h |  84 +++
 fs/squashfs/squashfs.h   |   9 +-
 fs/squashfs/squashfs_fs_sb.h |   2 +-
 fs/squashfs/super.c  |   7 +
 fs/squashfs/xz_wrapper.c |  15 +-
 fs/squashfs/zlib_wrapper.c   |  14 +-
 17 files changed, 876 insertions(+), 620 deletions(-)
 delete mode 100644 fs/squashfs/file_cache.c

-- 
2.14.1.821.g8fa685d3b7-goog



[PATCH 5/5] Squashfs: optimize reading uncompressed data

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht 

When dealing with uncompressed data, there is no need to read a whole
block (default 128K) to get the desired page: the pages are
independent from each others.

This patch change the readpages logic so that reading uncompressed
data only read the number of pages advised by the readahead algorithm.

Moreover, if the page actor contains holes (i.e. pages that are already
up-to-date), squashfs skips the buffer_head associated to those pages.

This patch greatly improve the performance of random reads for
uncompressed files because squashfs only read what is needed. It also
reduces the number of unnecessary reads.

Signed-off-by: Adrien Schildknecht 
Signed-off-by: Daniel Rosenberg 
---
 fs/squashfs/block.c   | 25 +
 fs/squashfs/file_direct.c | 37 ++---
 2 files changed, 55 insertions(+), 7 deletions(-)

diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 252dfc82ae72..37658b6e83ee 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -207,6 +207,22 @@ static void squashfs_bio_end_io(struct bio *bio)
kfree(bio_req);
 }
 
+static int bh_is_optional(struct squashfs_read_request *req, int idx)
+{
+   int start_idx, end_idx;
+   struct squashfs_sb_info *msblk = req->sb->s_fs_info;
+
+   start_idx = (idx * msblk->devblksize - req->offset) >> PAGE_SHIFT;
+   end_idx = ((idx + 1) * msblk->devblksize - req->offset + 1) >> 
PAGE_SHIFT;
+   if (start_idx >= req->output->pages)
+   return 1;
+   if (start_idx < 0)
+   start_idx = end_idx;
+   if (end_idx >= req->output->pages)
+   end_idx = start_idx;
+   return !req->output->page[start_idx] && !req->output->page[end_idx];
+}
+
 static int actor_getblks(struct squashfs_read_request *req, u64 block)
 {
int i;
@@ -216,6 +232,15 @@ static int actor_getblks(struct squashfs_read_request 
*req, u64 block)
return -ENOMEM;
 
for (i = 0; i < req->nr_buffers; ++i) {
+   /*
+* When dealing with an uncompressed block, the actor may
+* contains NULL pages. There's no need to read the buffers
+* associated with these pages.
+*/
+   if (!req->compressed && bh_is_optional(req, i)) {
+   req->bh[i] = NULL;
+   continue;
+   }
req->bh[i] = sb_getblk(req->sb, block + i);
if (!req->bh[i]) {
while (--i) {
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index a978811de327..dc87f77ce11e 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -111,15 +111,38 @@ int squashfs_readpages_block(struct page *target_page,
struct squashfs_page_actor *actor;
struct inode *inode = mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-   int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+   int start_index, end_index, file_end, actor_pages, res;
int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
-   int start_index = page_index & ~mask;
-   int end_index = start_index | mask;
-   int actor_pages, res;
 
-   if (end_index > file_end)
-   end_index = file_end;
-   actor_pages = end_index - start_index + 1;
+   /*
+* If readpage() is called on an uncompressed datablock, we can just
+* read the pages instead of fetching the whole block.
+* This greatly improves the performance when a process keep doing
+* random reads because we only fetch the necessary data.
+* The readahead algorithm will take care of doing speculative reads
+* if necessary.
+* We can't read more than 1 block even if readahead provides use more
+* pages because we don't know yet if the next block is compressed or
+* not.
+*/
+   if (bsize && !SQUASHFS_COMPRESSED_BLOCK(bsize)) {
+   u64 block_end = block + msblk->block_size;
+
+   block += (page_index & mask) * PAGE_SIZE;
+   actor_pages = (block_end - block) / PAGE_SIZE;
+   if (*nr_pages < actor_pages)
+   actor_pages = *nr_pages;
+   start_index = page_index;
+   bsize = min_t(int, bsize, (PAGE_SIZE * actor_pages)
+ | SQUASHFS_COMPRESSED_BIT_BLOCK);
+   } else {
+   file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+   start_index = page_index & ~mask;
+   end_index = start_index | mask;
+   if (end_index > file_end)
+   end_index = file_end;
+   actor_pages = end_index - start_i

[PATCH 0/5] Squashfs Whitelist and Compression Threshold

2017-09-22 Thread Daniel Rosenberg
These patches contain several optimizations to Squashfs. ll_rw_block is
replaced with submit_bio. readpages is implemented to support asynchronous
readahead. Uncompressed file reads are optimized, no longer requiring the
entire block to be read if the block doesn't need to be decompressed, which
greatly improves random read speeds in uncompressed files. There is a separate
set of patches to the userspace tools adding a whitelist of files to not
compress, and an ability to set a compression threshold to avoid compressing
files where there isn't a significant advantage made by compressing.

Adrien Schildknecht (5):
  Squashfs: remove the FILE_CACHE option
  Squashfs: refactor page_actor
  Squashfs: replace buffer_head with BIO
  Squashfs: implement .readpages()
  Squashfs: optimize reading uncompressed data

 fs/squashfs/Kconfig  |  28 ---
 fs/squashfs/Makefile |   3 +-
 fs/squashfs/block.c  | 547 ---
 fs/squashfs/cache.c  |  73 +++---
 fs/squashfs/decompressor.c   |  55 +++--
 fs/squashfs/file.c   | 137 ---
 fs/squashfs/file_cache.c |  38 ---
 fs/squashfs/file_direct.c| 240 +--
 fs/squashfs/lz4_wrapper.c|  29 +--
 fs/squashfs/lzo_wrapper.c|  40 +---
 fs/squashfs/page_actor.c | 175 +-
 fs/squashfs/page_actor.h |  84 +++
 fs/squashfs/squashfs.h   |   9 +-
 fs/squashfs/squashfs_fs_sb.h |   2 +-
 fs/squashfs/super.c  |   7 +
 fs/squashfs/xz_wrapper.c |  15 +-
 fs/squashfs/zlib_wrapper.c   |  14 +-
 17 files changed, 876 insertions(+), 620 deletions(-)
 delete mode 100644 fs/squashfs/file_cache.c

-- 
2.14.1.821.g8fa685d3b7-goog



[PATCH 2/5] Squashfs: refactor page_actor

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht 

This patch essentially does 3 things:
  1/ Always use an array of page to store the data instead of a mix of
 buffers and pages.
  2/ It is now possible to have 'holes' in a page actor, i.e. NULL
 pages in the array.
 When reading a block (default 128K), squashfs tries to grab all
 the pages covering this block. If a single page is up-to-date or
 locked, it falls back to using an intermediate buffer to do the
 read and then copy the pages in the actor. Allowing holes in the
 page actor remove the need for this intermediate buffer.
  3/ Refactor the wrappers to share code that deals with page actors.

Signed-off-by: Adrien Schildknecht 
Signed-off-by: Daniel Rosenberg 
---
 fs/squashfs/cache.c  |  73 +++---
 fs/squashfs/decompressor.c   |  55 +++---
 fs/squashfs/file_direct.c|   4 +-
 fs/squashfs/lz4_wrapper.c|  29 +--
 fs/squashfs/lzo_wrapper.c|  40 ++
 fs/squashfs/page_actor.c | 175 ---
 fs/squashfs/page_actor.h |  52 +
 fs/squashfs/squashfs_fs_sb.h |   2 +-
 fs/squashfs/xz_wrapper.c |  15 +++-
 fs/squashfs/zlib_wrapper.c   |  14 +++-
 10 files changed, 251 insertions(+), 208 deletions(-)

diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 23813c078cc9..05e42441d106 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -209,17 +209,14 @@ void squashfs_cache_put(struct squashfs_cache_entry 
*entry)
  */
 void squashfs_cache_delete(struct squashfs_cache *cache)
 {
-   int i, j;
+   int i;
 
if (cache == NULL)
return;
 
for (i = 0; i < cache->entries; i++) {
-   if (cache->entry[i].data) {
-   for (j = 0; j < cache->pages; j++)
-   kfree(cache->entry[i].data[j]);
-   kfree(cache->entry[i].data);
-   }
+   if (cache->entry[i].page)
+   free_page_array(cache->entry[i].page, cache->pages);
kfree(cache->entry[i].actor);
}
 
@@ -236,7 +233,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
 struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
 {
-   int i, j;
+   int i;
struct squashfs_cache *cache = kzalloc(sizeof(*cache), GFP_KERNEL);
 
if (cache == NULL) {
@@ -268,22 +265,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, 
int entries,
init_waitqueue_head(>entry[i].wait_queue);
entry->cache = cache;
entry->block = SQUASHFS_INVALID_BLK;
-   entry->data = kcalloc(cache->pages, sizeof(void *), GFP_KERNEL);
-   if (entry->data == NULL) {
+   entry->page = alloc_page_array(cache->pages, GFP_KERNEL);
+   if (!entry->page) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
}
-
-   for (j = 0; j < cache->pages; j++) {
-   entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
-   if (entry->data[j] == NULL) {
-   ERROR("Failed to allocate %s buffer\n", name);
-   goto cleanup;
-   }
-   }
-
-   entry->actor = squashfs_page_actor_init(entry->data,
-   cache->pages, 0);
+   entry->actor = squashfs_page_actor_init(entry->page,
+   cache->pages, 0, NULL);
if (entry->actor == NULL) {
ERROR("Failed to allocate %s cache entry\n", name);
goto cleanup;
@@ -314,18 +302,20 @@ int squashfs_copy_data(void *buffer, struct 
squashfs_cache_entry *entry,
return min(length, entry->length - offset);
 
while (offset < entry->length) {
-   void *buff = entry->data[offset / PAGE_SIZE]
-   + (offset % PAGE_SIZE);
+   void *buff = kmap_atomic(entry->page[offset / PAGE_SIZE])
++ (offset % PAGE_SIZE);
int bytes = min_t(int, entry->length - offset,
PAGE_SIZE - (offset % PAGE_SIZE));
 
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
+   kunmap_atomic(buff);
remaining = 0;
break;
}
 
memcpy(buffer, buff, bytes);
+   kunmap_atomic(buff);
buffer += bytes;
remaining -= bytes;
offset += bytes;
@@ -416,43 +406,38 @@ struct squashfs_cache_entry

[PATCH 1/5] Squashfs: remove the FILE_CACHE option

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht <adri...@google.com>

FILE_DIRECT is working fine and offers faster results and lower memory
footprint.

Removing FILE_CACHE makes our life easier because we don't have to
maintain 2 differents function that does the same thing.

Signed-off-by: Adrien Schildknecht <adri...@google.com>
Signed-off-by: Daniel Rosenberg <dro...@google.com>
---
 fs/squashfs/Kconfig  | 28 
 fs/squashfs/Makefile |  3 +--
 fs/squashfs/file_cache.c | 38 --
 fs/squashfs/page_actor.h | 42 +-
 4 files changed, 2 insertions(+), 109 deletions(-)
 delete mode 100644 fs/squashfs/file_cache.c

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 1adb3346b9d6..6c81bf620067 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,34 +25,6 @@ config SQUASHFS
 
  If unsure, say N.
 
-choice
-   prompt "File decompression options"
-   depends on SQUASHFS
-   help
- Squashfs now supports two options for decompressing file
- data.  Traditionally Squashfs has decompressed into an
- intermediate buffer and then memcopied it into the page cache.
- Squashfs now supports the ability to decompress directly into
- the page cache.
-
- If unsure, select "Decompress file data into an intermediate buffer"
-
-config SQUASHFS_FILE_CACHE
-   bool "Decompress file data into an intermediate buffer"
-   help
- Decompress file data into an intermediate buffer and then
- memcopy it into the page cache.
-
-config SQUASHFS_FILE_DIRECT
-   bool "Decompress files directly into the page cache"
-   help
- Directly decompress file data into the page cache.
- Doing so can significantly improve performance because
- it eliminates a memcpy and it also removes the lock contention
- on the single buffer.
-
-endchoice
-
 choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 6655631c53ae..225330ab7723 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,8 +5,7 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
-squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-y += file_direct.o page_actor.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
deleted file mode 100644
index f2310d2a2019..
--- a/fs/squashfs/file_cache.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher <phil...@squashfs.org.uk>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
-#include "squashfs.h"
-
-/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
-{
-   struct inode *i = page->mapping->host;
-   struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
-   block, bsize);
-   int res = buffer->error;
-
-   if (res)
-   ERROR("Unable to read page, block %llx, size %x\n", block,
-   bsize);
-   else
-   squashfs_copy_cache(page, buffer, buffer->length, 0);
-
-   squashfs_cache_put(buffer);
-   return res;
-}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 98537eab27e2..d2df0544e0df 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -8,46 +8,6 @@
  * the COPYING file in the top-level directory.
  */
 
-#ifndef CONFIG_SQUASHFS_FILE_DIRECT
-struct squashfs_page_actor {
-   void**page;
-   int pages;
-   int length;
-   int next_page;
-};
-
-static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
-   int pages, int length)
-{
-   struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
-   if (actor == NULL)
-   return NULL;
-
-   actor->length = length ? : pages * PAGE_SIZE;
-   actor->page = page;
-   actor->pages = pages;
-   actor->next_page = 0;
-   return actor;
-}
-
-static inline void *squashfs_first_page

[PATCH 1/5] Squashfs: remove the FILE_CACHE option

2017-09-22 Thread Daniel Rosenberg
From: Adrien Schildknecht 

FILE_DIRECT is working fine and offers faster results and lower memory
footprint.

Removing FILE_CACHE makes our life easier because we don't have to
maintain 2 differents function that does the same thing.

Signed-off-by: Adrien Schildknecht 
Signed-off-by: Daniel Rosenberg 
---
 fs/squashfs/Kconfig  | 28 
 fs/squashfs/Makefile |  3 +--
 fs/squashfs/file_cache.c | 38 --
 fs/squashfs/page_actor.h | 42 +-
 4 files changed, 2 insertions(+), 109 deletions(-)
 delete mode 100644 fs/squashfs/file_cache.c

diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 1adb3346b9d6..6c81bf620067 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,34 +25,6 @@ config SQUASHFS
 
  If unsure, say N.
 
-choice
-   prompt "File decompression options"
-   depends on SQUASHFS
-   help
- Squashfs now supports two options for decompressing file
- data.  Traditionally Squashfs has decompressed into an
- intermediate buffer and then memcopied it into the page cache.
- Squashfs now supports the ability to decompress directly into
- the page cache.
-
- If unsure, select "Decompress file data into an intermediate buffer"
-
-config SQUASHFS_FILE_CACHE
-   bool "Decompress file data into an intermediate buffer"
-   help
- Decompress file data into an intermediate buffer and then
- memcopy it into the page cache.
-
-config SQUASHFS_FILE_DIRECT
-   bool "Decompress files directly into the page cache"
-   help
- Directly decompress file data into the page cache.
- Doing so can significantly improve performance because
- it eliminates a memcpy and it also removes the lock contention
- on the single buffer.
-
-endchoice
-
 choice
prompt "Decompressor parallelisation options"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 6655631c53ae..225330ab7723 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,8 +5,7 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
-squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
-squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-y += file_direct.o page_actor.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
 squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
deleted file mode 100644
index f2310d2a2019..
--- a/fs/squashfs/file_cache.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2013
- * Phillip Lougher 
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include "squashfs_fs.h"
-#include "squashfs_fs_sb.h"
-#include "squashfs_fs_i.h"
-#include "squashfs.h"
-
-/* Read separately compressed datablock and memcopy into page cache */
-int squashfs_readpage_block(struct page *page, u64 block, int bsize)
-{
-   struct inode *i = page->mapping->host;
-   struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
-   block, bsize);
-   int res = buffer->error;
-
-   if (res)
-   ERROR("Unable to read page, block %llx, size %x\n", block,
-   bsize);
-   else
-   squashfs_copy_cache(page, buffer, buffer->length, 0);
-
-   squashfs_cache_put(buffer);
-   return res;
-}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 98537eab27e2..d2df0544e0df 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -8,46 +8,6 @@
  * the COPYING file in the top-level directory.
  */
 
-#ifndef CONFIG_SQUASHFS_FILE_DIRECT
-struct squashfs_page_actor {
-   void**page;
-   int pages;
-   int length;
-   int next_page;
-};
-
-static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
-   int pages, int length)
-{
-   struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
-
-   if (actor == NULL)
-   return NULL;
-
-   actor->length = length ? : pages * PAGE_SIZE;
-   actor->page = page;
-   actor->pages = pages;
-   actor->next_page = 0;
-   return actor;
-}
-
-static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
-{
-   actor->next_page = 1;
-   return actor->page[0];
-}
-