[PATCH 12/20] btrfs-progs: cmds-check.c: introduce __create_inode_item

2017-02-28 Thread Su Yue
Introduce '__create_inode' to create and insert inode item.
Modify origin 'create_inode_item' call it.
Create 'create_inode_item_lowmem' call it.
The patch is for further lowmem repair.

Signed-off-by: Su Yue 
---
 cmds-check.c | 83 ++--
 1 file changed, 53 insertions(+), 30 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index bda0849b..256bfbc9 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -2586,13 +2586,55 @@ static int delete_dir_index(struct btrfs_root *root,
return ret;
 }
 
+static int __create_inode_item(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root, u64 ino, u64 size,
+  u64 nbytes, u64 nlink, u32 mode)
+{
+   struct btrfs_inode_item ii;
+   time_t now = time(NULL);
+   int ret;
+
+   btrfs_set_stack_inode_size(&ii, size);
+   btrfs_set_stack_inode_nbytes(&ii, nbytes);
+   btrfs_set_stack_inode_nlink(&ii, nlink);
+   btrfs_set_stack_inode_mode(&ii, mode);
+   btrfs_set_stack_inode_generation(&ii, trans->transid);
+   btrfs_set_stack_timespec_nsec(&ii.atime, 0);
+   btrfs_set_stack_timespec_sec(&ii.ctime, now);
+   btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
+   btrfs_set_stack_timespec_sec(&ii.mtime, now);
+   btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
+   btrfs_set_stack_timespec_sec(&ii.otime, 0);
+   btrfs_set_stack_timespec_nsec(&ii.otime, 0);
+
+   ret = btrfs_insert_inode(trans, root, ino, &ii);
+   ASSERT(!ret);
+
+   warning("root %llu inode %llu recreating inode item, this may "
+   "be incomplete, please check permissions and content after "
+   "the fsck completes.\n", (unsigned long long)root->objectid,
+   (unsigned long long)ino);
+
+   return 0;
+}
+
+static int create_inode_item_lowmem(struct btrfs_trans_handle *trans,
+   struct btrfs_root *root, u64 ino,
+   u8 filetype)
+{
+   u32 mode = (filetype == BTRFS_FT_DIR ? S_IFDIR : S_IFREG) | 0755;
+
+   return __create_inode_item(trans, root, ino, 0, 0, 0, mode);
+}
+
 static int create_inode_item(struct btrfs_root *root,
 struct inode_record *rec,
 struct inode_backref *backref, int root_dir)
 {
struct btrfs_trans_handle *trans;
-   struct btrfs_inode_item inode_item;
-   time_t now = time(NULL);
+   u64 nlink = 0;
+   u32 mode = 0;
+   u64 size = 0;
int ret;
 
trans = btrfs_start_transaction(root, 1);
@@ -2601,18 +2643,7 @@ static int create_inode_item(struct btrfs_root *root,
return ret;
}
 
-   fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
-   "be incomplete, please check permissions and content after "
-   "the fsck completes.\n", (unsigned long long)root->objectid,
-   (unsigned long long)rec->ino);
-
-   memset(&inode_item, 0, sizeof(inode_item));
-   btrfs_set_stack_inode_generation(&inode_item, trans->transid);
-   if (root_dir)
-   btrfs_set_stack_inode_nlink(&inode_item, 1);
-   else
-   btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
-   btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
+   nlink = root_dir ? 1 : rec->found_link;
if (rec->found_dir_item) {
if (rec->found_file_extent)
fprintf(stderr, "root %llu inode %llu has both a dir "
@@ -2620,23 +2651,15 @@ static int create_inode_item(struct btrfs_root *root,
"regular file so setting it as a directory\n",
(unsigned long long)root->objectid,
(unsigned long long)rec->ino);
-   btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
-   btrfs_set_stack_inode_size(&inode_item, rec->found_size);
+   mode = S_IFDIR | 0755;
+   size = rec->found_size;
} else if (!rec->found_dir_item) {
-   btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
-   btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
-   }
-   btrfs_set_stack_timespec_sec(&inode_item.atime, now);
-   btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
-   btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
-   btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
-   btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
-   btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
-   btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
-   btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
-
-   ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
-   BUG_ON(ret);
+   size = rec->extent_end;
+   mode =  S_IFREG | 0755;
+   }

Re: [PATCH 1/2] Btrfs: fix unexpected file hole after disk errors

2017-02-28 Thread Qu Wenruo



At 03/01/2017 09:04 AM, Liu Bo wrote:

Btrfs creates hole extents to cover any unwritten section right before
doing buffer writes after commit 3ac0d7b96a26 ("btrfs: Change the expanding
write sequence to fix snapshot related bug.").

However, that takes the start position of the buffered write to compare
against the current EOF, hole extents would be created only if (EOF <
start).

If the EOF is at the middle of the buffered write, no hole extents will be
created and a file hole without a hole extent is left in this file.

This bug was revealed by generic/019 in fstests.  'fsstress' in this test
may create the above situation and the test then fails all requests
including writes, so the buffer write which is supposed to cover the
hole (without the hole extent) couldn't make it on disk.  Running fsck
against such btrfs ends up with detecting file extent holes.

Things could be more serious, some stale data would be exposed to
userspace if files with this kind of hole are truncated to a position of
the hole, because the on-disk inode size is beyond the last extent in the
file.

This fixes the bug by comparing the end position against the EOF.

Signed-off-by: Liu Bo 


Patch looks good to me.
Reviewed-by: Qu Wenruo 


---
 fs/btrfs/file.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da2..0be837b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1861,11 +1861,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
+   end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
-   if (start_pos > oldsize) {
+   if (end_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */


The comment still makes sense here, but it could be better to explain 
why to insert the hole to cover the whole write range (in case write fails)


Thanks,
Qu


-   end_pos = round_up(pos + count,
-  fs_info->sectorsize);
err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/2] Btrfs: fix unexpected file hole after disk errors

2017-02-28 Thread Qu Wenruo

It seems that my previous mail doesn't reach mail list.
So send again.

At 03/01/2017 09:04 AM, Liu Bo wrote:

Btrfs creates hole extents to cover any unwritten section right before
doing buffer writes after commit 3ac0d7b96a26 ("btrfs: Change the expanding
write sequence to fix snapshot related bug.").

However, that takes the start position of the buffered write to compare
against the current EOF, hole extents would be created only if (EOF <
start).

If the EOF is at the middle of the buffered write, no hole extents will be
created and a file hole without a hole extent is left in this file.

This bug was revealed by generic/019 in fstests.  'fsstress' in this test
may create the above situation and the test then fails all requests
including writes, so the buffer write which is supposed to cover the
hole (without the hole extent) couldn't make it on disk.  Running fsck
against such btrfs ends up with detecting file extent holes.

Things could be more serious, some stale data would be exposed to
userspace if files with this kind of hole are truncated to a position of
the hole, because the on-disk inode size is beyond the last extent in the
file.

This fixes the bug by comparing the end position against the EOF.

Signed-off-by: Liu Bo 


Patch looks good to me.
Reviewed-by: Qu Wenruo 


---
 fs/btrfs/file.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da2..0be837b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1861,11 +1861,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
+   end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
-   if (start_pos > oldsize) {
+   if (end_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */


The comment still makes sense here, but it could be better to explain 
why to insert the hole to cover the whole write range (in case write fails)


Thanks,
Qu


-   end_pos = round_up(pos + count,
-  fs_info->sectorsize);
err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);




--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/20] btrfs-progs: cmds-check.c: inode orphan item repair

2017-02-28 Thread Su Yue
Add a function named 'repair_inode_orphan_item_lowmem'.

Signed-off-by: Su Yue 
---
 cmds-check.c | 50 +-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/cmds-check.c b/cmds-check.c
index f13ce317..fb239968 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4936,6 +4936,46 @@ out:
 }
 
 /*
+ * repair ORPHAN_ITEM error
+ *
+ * Returns <0  means on error
+ * Returns  0  means successful repair
+ */
+static int repair_inode_orphan_item_lowmem(struct btrfs_root *root,
+  struct btrfs_path *path, u64 ino)
+{
+   struct btrfs_trans_handle *trans;
+   struct btrfs_key research_key;
+   int ret;
+   int ret2;
+
+   btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
+   btrfs_release_path(path);
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   goto out;
+   }
+
+   ret = btrfs_add_orphan_item(trans, root, path, ino);
+   if (ret)
+   goto out;
+
+   printf("added inode %llu orphan item root %llu", ino,
+  root->root_key.objectid);
+
+   btrfs_commit_transaction(trans, root);
+out:
+   if (ret < 0)
+   error("failed to add inode %llu orphan item root %llu due to 
%s",
+  ino, root->root_key.objectid, strerror(-ret));
+   btrfs_release_path(path);
+   ret2 = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
+
+   return ret2 < 0 ? ret2 : ret;
+}
+
+/*
  * Check INODE_ITEM and related ITEMs (the same inode number)
  * 1. check link count
  * 2. check inode ref/extref
@@ -5088,7 +5128,15 @@ out:
error("root %llu INODE[%llu] nlink(%llu) not equal to 
inode_refs(%llu)",
  root->objectid, inode_id, nlink, refs);
} else if (!nlink) {
-   err |= ORPHAN_ITEM;
+   if (repair)
+   ret = repair_inode_orphan_item_lowmem(root,
+ path,
+ inode_id);
+   if (!repair || ret) {
+   err |= ORPHAN_ITEM;
+   error("root %llu INODE[%llu] is orphan item",
+ root->objectid, inode_id);
+   }
}
 
if (!nbytes && !no_holes && extent_end < isize) {
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 11/20] btrfs-progs: inode.c: alter btrfs_add_link

2017-02-28 Thread Su Yue
Add an arg 'ignore_existed' to btrfs_add_link.
If ignore_existed=1, continue to add while relative dir index/item
or inode ref is already existed.

This patch is for further repair.

Signed-off-by: Su Yue 
---
 cmds-check.c   |  6 +++---
 convert/main.c |  2 +-
 ctree.h|  2 +-
 inode.c| 46 ++
 4 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 685f4f5d..bda0849b 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -2855,7 +2855,7 @@ static int reset_nlink(struct btrfs_trans_handle *trans,
list_for_each_entry(backref, &rec->backrefs, list) {
ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
 backref->name, backref->namelen,
-backref->filetype, &backref->index, 1);
+backref->filetype, &backref->index, 1, 0);
if (ret < 0)
goto out;
}
@@ -2947,7 +2947,7 @@ static int repair_inode_nlinks(struct btrfs_trans_handle 
*trans,
goto out;
}
ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
-namebuf, namelen, type, NULL, 1);
+namebuf, namelen, type, NULL, 1, 0);
/*
 * Add ".INO" suffix several times to handle case where
 * "FILENAME.INO" is already taken by another file.
@@ -2966,7 +2966,7 @@ static int repair_inode_nlinks(struct btrfs_trans_handle 
*trans,
namelen += count_digits(rec->ino) + 1;
ret = btrfs_add_link(trans, root, rec->ino,
 lost_found_ino, namebuf,
-namelen, type, NULL, 1);
+namelen, type, NULL, 1, 0);
}
if (ret < 0) {
fprintf(stderr,
diff --git a/convert/main.c b/convert/main.c
index 8d9f29fa..7607bec1 100644
--- a/convert/main.c
+++ b/convert/main.c
@@ -998,7 +998,7 @@ static int create_image(struct btrfs_root *root,
if (ret < 0)
goto out;
ret = btrfs_add_link(trans, root, ino, BTRFS_FIRST_FREE_OBJECTID, name,
-strlen(name), BTRFS_FT_REG_FILE, NULL, 1);
+strlen(name), BTRFS_FT_REG_FILE, NULL, 1, 0);
if (ret < 0)
goto out;
 
diff --git a/ctree.h b/ctree.h
index 0c34ae20..a28e36de 100644
--- a/ctree.h
+++ b/ctree.h
@@ -2779,7 +2779,7 @@ int btrfs_change_inode_flags(struct btrfs_trans_handle 
*trans,
 struct btrfs_root *root, u64 ino, u64 flags);
 int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
   u64 ino, u64 parent_ino, char *name, int namelen,
-  u8 type, u64 *index, int add_backref);
+  u8 type, u64 *index, int add_backref, int ignore_existed);
 int btrfs_unlink(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 u64 ino, u64 parent_ino, u64 index, const char *name,
 int namelen, int add_orphan);
diff --git a/inode.c b/inode.c
index 991b8ddb..62e8abec 100644
--- a/inode.c
+++ b/inode.c
@@ -161,7 +161,7 @@ out:
  */
 int btrfs_add_link(struct btrfs_trans_handle *trans, struct btrfs_root *root,
   u64 ino, u64 parent_ino, char *name, int namelen,
-  u8 type, u64 *index, int add_backref)
+  u8 type, u64 *index, int add_backref, int ignore_existed)
 {
struct btrfs_path *path;
struct btrfs_key key;
@@ -184,33 +184,38 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, 
struct btrfs_root *root,
}
 
ret = check_dir_conflict(root, name, namelen, parent_ino, ret_index);
-   if (ret < 0)
+   if (ret < 0 && (!ignore_existed || ret != -EEXIST))
goto out;
 
/* Add inode ref */
if (add_backref) {
ret = btrfs_insert_inode_ref(trans, root, name, namelen,
 ino, parent_ino, ret_index);
-   if (ret < 0)
+   if (ret < 0 && (!ignore_existed || ret != -EEXIST))
goto out;
 
-   /* Update nlinks for the inode */
-   key.objectid = ino;
-   key.type = BTRFS_INODE_ITEM_KEY;
-   key.offset = 0;
-   ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
-   if (ret) {
-   if (ret > 0)
-   ret = -ENOENT;
-   goto out;
+   /* do not update nlinks if existed */
+   if (!ret) {
+   /* Update nlinks for the inode */
+   key.objectid = ino;
+   

[PATCH 05/20] btrfs-progs: cmds-check.c: modify check_fs_first_inode

2017-02-28 Thread Su Yue
Modify check_fs_first_inode to check firt_inode inode item and inode ref.

Signed-off-by: Su Yue 
---
 cmds-check.c | 57 ++---
 1 file changed, 42 insertions(+), 15 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 246f4735..892a22ba 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -5169,42 +5169,68 @@ out:
return err;
 }
 
+/*
+ * check first root dir's inode_item, inde_ref
+ *
+ * returns 0 means no error
+ * returns >0 means error
+ * returns <0 means fatal error
+ */
 static int check_fs_first_inode(struct btrfs_root *root, unsigned int ext_ref)
 {
struct btrfs_path path;
struct btrfs_key key;
+   struct btrfs_inode_item *ii;
+   u64 index = 0;
+   u32 mode;
int err = 0;
int ret;
 
-   key.objectid = BTRFS_FIRST_FREE_OBJECTID;
-   key.type = BTRFS_INODE_ITEM_KEY;
-   key.offset = 0;
-
/* For root being dropped, we don't need to check first inode */
if (btrfs_root_refs(&root->root_item) == 0 &&
btrfs_disk_key_objectid(&root->root_item.drop_progress) >=
-   key.objectid)
+   BTRFS_FIRST_FREE_OBJECTID)
return 0;
 
+   /*search first inode item */
+   key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+   key.type = BTRFS_INODE_ITEM_KEY;
+   key.offset = 0;
+
btrfs_init_path(&path);
 
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
-   if (ret < 0)
+   if (ret < 0) {
goto out;
-   if (ret > 0) {
-   ret = 0;
+   } else if (ret > 0) {
err |= INODE_ITEM_MISSING;
-   error("first inode item of root %llu is missing",
- root->objectid);
+   } else {
+   ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
+   struct btrfs_inode_item);
+   mode = btrfs_inode_mode(path.nodes[0], ii);
+   if (imode_to_type(mode) != BTRFS_FT_DIR)
+   err |= INODE_ITEM_MISMATCH;
}
+   btrfs_release_path(&path);
+
+   /* lookup first inode ref */
+   key.offset = BTRFS_FIRST_FREE_OBJECTID;
+   key.type = BTRFS_INODE_REF_KEY;
+
+   ret = find_inode_ref(root, &key, "..", strlen(".."), &index, ext_ref);
+   if (ret < 0)
+   goto out;
+   err |= ret;
 
-   err |= check_inode_item(root, &path, ext_ref);
-   err &= ~LAST_ITEM;
-   if (err && !ret)
-   ret = -EIO;
 out:
btrfs_release_path(&path);
-   return ret;
+   if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
+   error("root dir INODE_ITEM is %s",
+ err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
+   if (err & INODE_REF_MISSING)
+   error("root dir INODE_REF is missing");
+
+   return ret < 0 ? ret : err;
 }
 
 /*
@@ -5232,6 +5258,7 @@ static int check_fs_root_v2(struct btrfs_root *root, 
unsigned int ext_ref)
 * we will just skip it forever.
 */
ret = check_fs_first_inode(root, ext_ref);
+   err |= !!ret;
if (ret < 0)
return ret;
 
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-02-28 Thread Matthew Wilcox
On Tue, Feb 28, 2017 at 05:36:05PM -0600, Goldwyn Rodrigues wrote:
> Find out if the write will trigger a wait due to writeback. If yes,
> return -EAGAIN.
> 
> This introduces a new function filemap_range_has_page() which
> returns true if the file's mapping has a page within the range
> mentioned.

Ugh, this is pretty inefficient.  If that's all you want to know, then
using the radix tree directly will be far more efficient than spinning
up all the pagevec machinery only to discard the pages found.

But what's going to kick these pages out of cache?  Shouldn't we rather
find the pages, kick them out if clean, start writeback if not, and *then*
return -EAGAIN?

So maybe we want to spin up the pagevec machinery after all so we can
do that extra work?

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 10/20] btrfs-progs: dir-item.c: modify btrfs_insert_dir_item

2017-02-28 Thread Su Yue
In the function 'btrfs_insert_dir_item', let it continue to
insert dir index if the dir item is existed and set ret value
to 0.

This further is for further repair.

Signed-off-by: Su Yue 
---
 dir-item.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/dir-item.c b/dir-item.c
index 846fc292..a112ed72 100644
--- a/dir-item.c
+++ b/dir-item.c
@@ -135,7 +135,14 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle 
*trans, struct btrfs_root
name, name_len);
if (IS_ERR(dir_item)) {
ret = PTR_ERR(dir_item);
-   goto out;
+
+   /* Continue to insert item if existed */
+   if (ret == -EEXIST) {
+   ret = 0;
+   goto next;
+   } else {
+   goto out;
+   }
}
 
leaf = path->nodes[0];
@@ -149,6 +156,7 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, 
struct btrfs_root
write_extent_buffer(leaf, name, name_ptr, name_len);
btrfs_mark_buffer_dirty(leaf);
 
+next:
/* FIXME, use some real flag for selecting the extra index */
if (root == root->fs_info->tree_root) {
ret = 0;
@@ -162,8 +170,11 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle 
*trans, struct btrfs_root
name, name_len);
if (IS_ERR(dir_item)) {
ret2 = PTR_ERR(dir_item);
+   if (ret2 == -EEXIST)
+   ret = 0;
goto out;
}
+
leaf = path->nodes[0];
btrfs_cpu_key_to_disk(&disk_key, location);
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] reflink: test adjacency of reflinked blocks

2017-02-28 Thread Eryu Guan
On Tue, Feb 28, 2017 at 08:31:50AM -0800, Darrick J. Wong wrote:
> On Tue, Feb 28, 2017 at 04:15:02PM +0800, Eryu Guan wrote:
> > On Fri, Feb 24, 2017 at 05:12:57PM -0800, Darrick J. Wong wrote:
> > > From: Darrick J. Wong 
> > > 
> > > If we reflink a file with N blocks to another file one block at a time,
> > > does the destination file end up with the same number of extents as the
> > > source file?  In other words, does the filesystem succeed at combining
> > > adjacent mappings into a maximal extents?
> > 
> > I'm not sure if this is a standard behavior and applies to btrfs too?
> > But btrfs is failing this test now:
> > 
> > +f1 (1) != f2 (32)
> > +s1 (1) != s2 (32)
> > 
> > Fix test or btrfs? I'm taking it if btrfs is the one to be fixed :)
> 
> btrfs has that weird behavior where it doesn't merge the adjacent
> extents at all (at least not according to FIEMAP) until you remount the
> filesystem.  After the remount it's fine, but... WTF? :)
> 
> So yes, the test is working as designed.  btrfs needs fixing, or I guess
> worst case we can _notrun it on btrfs.

Thanks for the explanation! I'll take it as it is at this moment then.

> 
> Snark aside, it was intended originally to make sure that XFS is
> properly merging the extent records together; then it occurred to me to
> rewrite it with fiemap and make it one of the generic reflink tests so
> that ocfs2 can get tested too.

Ah, that reminds me that I need to do testing with ocfs2 too, and test
passed :)

Thanks,
Eryu
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/20] btrfs-progs: cmds-check.c: introduce print_inode_ref

2017-02-28 Thread Su Yue
Introduce 'print_inode_ref' to print error msg while checking inode ref.

Add args 'name_ret' and 'namelen_ret' to 'check_inode_ref' because
they are essential while doing nlinks repair.

Signed-off-by: Su Yue 
---
 cmds-check.c | 93 +++-
 1 file changed, 74 insertions(+), 19 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index c45dfae4..24a39e54 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4334,34 +4334,76 @@ out:
 }
 
 /*
+ * Print inode ref error message
+ */
+static void print_inode_ref_err(struct btrfs_root *root, struct btrfs_key *key,
+   u64 index, const char *namebuf, int name_len,
+   u8 filetype, int err)
+{
+   if (!err)
+   return;
+
+   /*root dir error */
+   if (key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+   error("root %llu root dir shouldn't have INODE REF[%llu %llu] 
name %s",
+ root->objectid, key->objectid, key->offset, namebuf);
+   return;
+   }
+
+   /* normal error */
+   if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
+   error("root %llu DIR ITEM[%llu %llu] %s name %s filetype %u",
+  root->objectid, key->offset,
+  btrfs_name_hash(namebuf, name_len),
+  err & DIR_ITEM_MISMATCH ? "mismath" : "missing",
+  namebuf, filetype);
+   if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
+   error("root %llu DIR INDEX[%llu %llu] %s name %s filetype %u",
+  root->objectid, key->offset,
+  index,
+  err & DIR_ITEM_MISMATCH ? "mismath" : "missing",
+  namebuf, filetype);
+}
+
+/*
  * Traverse the given INODE_REF and call find_dir_item() to find related
- * DIR_ITEM/DIR_INDEX.
+ * DIR_ITEM/DIR_INDEX.If repair is enable, research @ref_key and
+ * @path may change.
  *
  * @root:  the root of the fs/file tree
  * @ref_key:   the key of the INODE_REF
+ * @paththe path provides node and slot
  * @refs:  the count of INODE_REF
  * @mode:  the st_mode of INODE_ITEM
+ * @name_ret:   returns with the first ref's name
+ * @name_len_ret:len of the name_ret
  *
+ * Return <0 on error.
  * Return 0 if no error occurred.
  */
 static int check_inode_ref(struct btrfs_root *root, struct btrfs_key *ref_key,
-  struct extent_buffer *node, int slot, u64 *refs,
-  int mode)
+  struct btrfs_path *path, char *name_ret,
+  u32 *namelen_ret, u64 *refs, int mode)
 {
struct btrfs_key key;
struct btrfs_key location;
struct btrfs_inode_ref *ref;
+   struct extent_buffer *node;
char namebuf[BTRFS_NAME_LEN] = {0};
+   int name_len;
u32 total;
u32 cur = 0;
-   u32 len;
-   u32 name_len;
+   long len;
u64 index;
-   int ret, err = 0;
+   int err = 0;
+   int tmp_err;
+   int slot;
 
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
+   node = path->nodes[0];
+   slot = path->slots[0];
 
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size_nr(node, slot);
@@ -4370,6 +4412,7 @@ next:
/* Update inode ref count */
(*refs)++;
 
+   tmp_err = 0;
index = btrfs_inode_ref_index(node, ref);
name_len = btrfs_inode_ref_name_len(node, ref);
if (name_len <= BTRFS_NAME_LEN) {
@@ -4382,30 +4425,40 @@ next:
 
read_extent_buffer(node, namebuf, (unsigned long)(ref + 1), len);
 
-   /* Check root dir ref name */
-   if (index == 0 && strncmp(namebuf, "..", name_len)) {
-   error("root %llu INODE_REF[%llu %llu] ROOT_DIR name shouldn't 
be %s",
- root->objectid, ref_key->objectid, ref_key->offset,
- namebuf);
-   err |= ROOT_DIR_ERROR;
+   /* copy the firt name found to name_ret */
+   if (*refs == 1 && name_ret) {
+   memcpy(name_ret, namebuf, len);
+   *namelen_ret = len;
+   }
+   /* Check root dir ref */
+   if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
+   if (index != 0 || len != strlen("..") ||
+   strncmp("..", namebuf, len) ||
+   ref_key->offset != BTRFS_FIRST_FREE_OBJECTID) {
+   /* set fake err bit so repair will delete the ref */
+   err |= DIR_INDEX_MISSING;
+   err |= DIR_ITEM_MISSING;
+   }
+   goto end;
}
 
/* Find related DIR_INDEX */
key.objectid = ref_key->offset;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = index;
-   ret = find_dir_item(root, &key, &locatio

[PATCH 04/20] btrfs-progs: cmds-check.c: change find_inode_ref's arg

2017-02-28 Thread Su Yue
For further lowmem repairs, change the index type u64 to u64 *.
So we could get the index of ref.

Signed-off-by: Su Yue 
---
 cmds-check.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index fb239968..246f4735 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4422,22 +4422,23 @@ next:
 }
 
 /*
- * Find INODE_REF/INODE_EXTREF for the given key and check it with the 
specified
- * DIR_ITEM/DIR_INDEX match.
+ * Find INODE_REF/INODE_EXTREF for the given key and check it with the
+ * specified DIR_ITEM/DIR_INDEX match.Returned with right @index.
  *
  * @root:  the root of the fs/file tree
  * @key:   the key of the INODE_REF/INODE_EXTREF
  * @name:  the name in the INODE_REF/INODE_EXTREF
  * @namelen:   the length of name in the INODE_REF/INODE_EXTREF
- * @index: the index in the INODE_REF/INODE_EXTREF, for DIR_ITEM set index
- * to (u64)-1
+ * @index_ret: the index in the INODE_REF/INODE_EXTREF,
+ *  value (64)-1 means do not check index and return
+ *  with matched index.
  * @ext_ref:   the EXTENDED_IREF feature
  *
  * Return 0 if no error occurred.
  * Return >0 for error bitmap
  */
 static int find_inode_ref(struct btrfs_root *root, struct btrfs_key *key,
- char *name, int namelen, u64 index,
+ const char *name, int namelen, u64 *index_ret,
  unsigned int ext_ref)
 {
struct btrfs_path path;
@@ -4474,7 +4475,8 @@ static int find_inode_ref(struct btrfs_root *root, struct 
btrfs_key *key,
 
ref_namelen = btrfs_inode_ref_name_len(node, ref);
ref_index = btrfs_inode_ref_index(node, ref);
-   if (index != (u64)-1 && index != ref_index)
+   if (index_ret && *index_ret != (u64)-1 &&
+   *index_ret != ref_index)
goto next_ref;
 
if (ref_namelen <= BTRFS_NAME_LEN) {
@@ -4492,7 +4494,8 @@ static int find_inode_ref(struct btrfs_root *root, struct 
btrfs_key *key,
 
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_ref;
-
+   if (index_ret)
+   *index_ret = ref_index;
ret = 0;
goto out;
 next_ref:
@@ -4533,7 +4536,8 @@ extref:
ref_namelen = btrfs_inode_extref_name_len(node, extref);
ref_index = btrfs_inode_extref_index(node, extref);
parent = btrfs_inode_extref_parent(node, extref);
-   if (index != (u64)-1 && index != ref_index)
+   if (index_ret && *index_ret != (u64)-1 &&
+   *index_ret != ref_index)
goto next_extref;
 
if (parent != dir_id)
@@ -4555,6 +4559,8 @@ extref:
if (len != namelen || strncmp(ref_namebuf, name, len))
goto next_extref;
 
+   if (index_ret)
+   *index_ret = ref_index;
ret = 0;
goto out;
 
@@ -4668,7 +4674,7 @@ static int check_dir_item(struct btrfs_root *root, struct 
btrfs_key *key,
location.type = BTRFS_INODE_REF_KEY;
location.offset = key->objectid;
ret = find_inode_ref(root, &location, namebuf, len,
-  index, ext_ref);
+  &index, ext_ref);
err |= ret;
if (ret & INODE_REF_MISSING)
error("root %llu %s[%llu %llu] relative INODE_REF 
missing namelen %u filename %s filetype %d",
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 13/20] btrfs-progs: cmds-check.c: repair_inode_item_missing

2017-02-28 Thread Su Yue
Introduce 'repair_inode_item_missing' to fix INODE_ITEM_MISSING
This patch is for further repair.

Signed-off-by: Su Yue 
---
 cmds-check.c | 43 +++
 1 file changed, 43 insertions(+)

diff --git a/cmds-check.c b/cmds-check.c
index 256bfbc9..6dafbd7d 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -5463,6 +5463,49 @@ out:
 }
 
 /*
+ * Insert the missing inode item.
+ *
+ * Returns 0 means success.
+ * Returns <0 means error.
+ */
+static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
+u8 filetype, int err)
+{
+   struct btrfs_key key;
+   struct btrfs_trans_handle *trans;
+   struct btrfs_path path;
+   int ret;
+
+   if (!err)
+   return 0;
+
+   key.objectid = ino;
+   key.type = BTRFS_INODE_ITEM_KEY;
+   key.offset = 0;
+
+   btrfs_init_path(&path);
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans)) {
+   ret = -EIO;
+   goto out;
+   }
+
+   ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
+   if (ret < 0 || !ret)
+   goto out;
+   /* insert inode item */
+   create_inode_item_lowmem(trans, root, ino, filetype);
+   ret = 0;
+   btrfs_commit_transaction(trans, root);
+out:
+   if (ret)
+   error("Failed to repair root %llu INODE ITEM[%llu] missing",
+ root->objectid, ino);
+   btrfs_release_path(&path);
+   return ret;
+}
+
+/*
  * check first root dir's inode_item, inde_ref
  *
  * returns 0 means no error
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/20] btrfs-progs: cmds-check.c: print_dir_item_err

2017-02-28 Thread Su Yue
Introduce 'print_dir_item_err' to print error msg while
checking dir_item/dir_index.
'check_dir_item' now checks relative dir item and calls
'print_dir_itm_err" to print error msg.

Signed-off-by: Su Yue 
---
 cmds-check.c | 127 ---
 1 file changed, 87 insertions(+), 40 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 24a39e54..44abb282 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4707,25 +4707,61 @@ out:
return ret;
 }
 
+static void print_dir_item_err(struct btrfs_root *root, struct btrfs_key *key,
+  u64 ino, u64 index, const char *namebuf,
+  int name_len, u8 filetype, int err)
+{
+   if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING)) {
+   error("root %llu DIR ITEM[%llu %llu] name %s filetype %d %s",
+ root->objectid, key->objectid, key->offset, namebuf,
+ filetype,
+ err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
+   }
+
+   if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING)) {
+   error("root %llu DIR INDEX[%llu %llu] name %s filetype %d %s",
+ root->objectid, key->objectid, index, namebuf,
+ filetype,
+ err & DIR_ITEM_MISMATCH ? "mismath" : "missing");
+   }
+
+   if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH)) {
+   error("root %llu INODE_ITEM[%llu] index %llu name %s filetype 
%d %s",
+ root->objectid, ino, index, namebuf,
+ filetype,
+ err & INODE_ITEM_MISMATCH ? "mismath" : "missing");
+   }
+
+   if (err & INODE_REF_MISSING)
+   error("root %llu INODE REF[%llu, %llu] name %s filetype %u 
missing",
+ root->objectid, ino, key->objectid, namebuf, filetype);
+
+}
+
 /*
  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
- * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.
+ * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.If repair
+ * is enable, do repair and research by @path->nodes[0].
  *
  * @root:  the root of the fs/file tree
  * @key:   the key of the INODE_REF/INODE_EXTREF
+ * @path:   the path of key
  * @size:  the st_size of the INODE_ITEM
  * @ext_ref:   the EXTENDED_IREF feature
  *
  * Return 0 if no error occurred.
+ * Return >0 if on error
  */
 static int check_dir_item(struct btrfs_root *root, struct btrfs_key *key,
- struct extent_buffer *node, int slot, u64 *size,
+ struct btrfs_path *path, u64 *size,
  unsigned int ext_ref)
 {
struct btrfs_dir_item *di;
struct btrfs_inode_item *ii;
-   struct btrfs_path path;
+   struct btrfs_key key2;
struct btrfs_key location;
+   struct extent_buffer *node;
+   int slot;
char namebuf[BTRFS_NAME_LEN] = {0};
u32 total;
u32 cur = 0;
@@ -4737,6 +4773,7 @@ static int check_dir_item(struct btrfs_root *root, struct 
btrfs_key *key,
u64 index;
int ret;
int err = 0;
+   int tmp_err;
 
/*
 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
@@ -4744,11 +4781,14 @@ static int check_dir_item(struct btrfs_root *root, 
struct btrfs_key *key,
 */
index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
 
+   node = path->nodes[0];
+   slot = path->slots[0];
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(node, slot);
 
while (cur < total) {
data_len = btrfs_dir_data_len(node, di);
+   tmp_err = 0;
if (data_len)
error("root %llu %s[%llu %llu] data_len shouldn't be 
%u",
  root->objectid, key->type == BTRFS_DIR_ITEM_KEY ?
@@ -4768,58 +4808,61 @@ static int check_dir_item(struct btrfs_root *root, 
struct btrfs_key *key,
}
(*size) += name_len;
 
-   read_extent_buffer(node, namebuf, (unsigned long)(di + 1), len);
+   read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
+  len);
filetype = btrfs_dir_type(node, di);
 
-   btrfs_init_path(&path);
btrfs_dir_item_key_to_cpu(node, di, &location);
-
/* Ignore related ROOT_ITEM check */
if (location.type == BTRFS_ROOT_ITEM_KEY)
goto next;
 
+   btrfs_release_path(path);
/* Check relative INODE_ITEM(existence/filetype) */
-   ret = btrfs_search_slot(NULL, root, &location, &path, 0, 0);
+   ret = btrfs_search_slot(NULL, root, &location, path, 0, 0);
if (ret) {
-   err |= INODE_ITEM_MISSING;
-

[PATCH 00/20] Enable lowmem repair for fs/subvolume tree

2017-02-28 Thread Su Yue
It can be feched from my github:
https://github.com/Damenly/btrfs-progs.git lowmem_repair

This patchset can repair errors found in fs tree in lowmem mode.

This patchset request includes:
1) Repair inode nbytes error.
2) Repair dir isize error.
3) Repair orpahan inode item.
4) Repair dir item/index missing/mismatch.
5) Repair inode ref missing/mismatch.
6) Repair inode item missing.
7) Repair inode nlink error.
8) Punch file extent hole.
9) Let test-fsck test cases which can be repaired in lowmem mode.

All cases have been tested except fsck-test/006 since it can't be
repaired in original mode.

Qu Wenruo (1):
  btrfs-progs: fsck-check: Allow fsck check test to repair in lowmem
mode for certain test cases

Su Yue (19):
  btrfs-progs: cmds-check.c: supports inode nbytes fix in lowmem
  btrfs-progs: cmds-check.c: supports dir isize fix in lowmem
  btrfs-progs: cmds-check.c: inode orphan item repair
  btrfs-progs: cmds-check.c: change find_inode_ref's arg
  btrfs-progs: cmds-check.c: modify check_fs_first_inode
  btrfs-progs: cmds-check.c: change find_dir_index/item
  btrfs-progs: cmds-check.c: introduce print_inode_ref
  btrfs-progs: cmds-check.c: print_dir_item_err
  btrfs-progs: cmds-check.c: introduce count_dir_isize
  btrfs-progs: dir-item.c: modify btrfs_insert_dir_item
  btrfs-progs: inode.c: alter btrfs_add_link
  btrfs-progs: cmds-check.c: introduce __create_inode_item
  btrfs-progs: cmds-check.c: repair_inode_item_missing
  btrfs-progs: cmds-check.c: repair_fs_first_inode
  btrfs-progs: cmds-check.c: introduce repair_ternary_lowmem
  btrfs-progs: cmds-check.c: Introduce repair_dir_item
  btrfs-progs: cmds-check.c: repair inode ref
  btrfs-progs: cmds-check.c: repair nlinks lowmem
  btrfs-progs: cmds-check.c: add punch_extent_hole

 cmds-check.c   | 1677 +++-
 convert/main.c |2 +-
 ctree.h|2 +-
 dir-item.c |   13 +-
 inode.c|   46 +-
 tests/common.local |   14 +-
 .../fsck-tests/004-no-dir-index/.lowmem_repairable |0
 .../009-no-dir-item-or-index/.lowmem_repairable|0
 .../010-no-rootdir-inode-item/.lowmem_repairable   |0
 .../011-no-inode-item/.lowmem_repairable   |0
 .../016-wrong-inode-nbytes/.lowmem_repairable  |0
 .../017-missing-all-file-extent/.lowmem_repairable |0
 12 files changed, 1363 insertions(+), 391 deletions(-)
 create mode 100644 tests/fsck-tests/004-no-dir-index/.lowmem_repairable
 create mode 100644 tests/fsck-tests/009-no-dir-item-or-index/.lowmem_repairable
 create mode 100644 
tests/fsck-tests/010-no-rootdir-inode-item/.lowmem_repairable
 create mode 100644 tests/fsck-tests/011-no-inode-item/.lowmem_repairable
 create mode 100644 tests/fsck-tests/016-wrong-inode-nbytes/.lowmem_repairable
 create mode 100644 
tests/fsck-tests/017-missing-all-file-extent/.lowmem_repairable

-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/20] btrfs-progs: cmds-check.c: introduce count_dir_isize

2017-02-28 Thread Su Yue
Introduce 'count_dir_isize' to get dir isize.
This function is called only under lowmme repair mode.

Signed-off-by: Su Yue 
---
 cmds-check.c | 95 ++--
 1 file changed, 92 insertions(+), 3 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 44abb282..685f4f5d 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4738,6 +4738,93 @@ static void print_dir_item_err(struct btrfs_root *root, 
struct btrfs_key *key,
 
 }
 
+static int __count_dir_isize(struct btrfs_root *root, u64 ino,
+int type, u64 *size_ret)
+{
+   struct btrfs_key key;
+   struct btrfs_path path;
+   u32 len;
+   struct btrfs_dir_item *di;
+   int ret;
+   int cur = 0;
+   int total = 0;
+
+   ASSERT(size_ret);
+   *size_ret = 0;
+
+   key.objectid = ino;
+   key.type = type;
+   key.offset = (u64)-1;
+
+   btrfs_init_path(&path);
+   ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+   if (ret < 0) {
+   ret = -EIO;
+   goto out;
+   }
+   /* if found, go to spacial case */
+   if (ret == 0)
+   goto special_case;
+
+loop:
+   ret = btrfs_previous_item(root, &path, ino, type);
+
+   if (ret) {
+   ret = 0;
+   goto out;
+   }
+
+special_case:
+
+   di = btrfs_item_ptr(path.nodes[0], path.slots[0],
+   struct btrfs_dir_item);
+   cur = 0;
+   total = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
+
+   while (cur < total) {
+   len = btrfs_dir_name_len(path.nodes[0], di);
+   if (len > BTRFS_NAME_LEN)
+   len = BTRFS_NAME_LEN;
+   *size_ret += len;
+
+   len += btrfs_dir_data_len(path.nodes[0], di);
+   len += sizeof(*di);
+   di = (struct btrfs_dir_item *)((char *)di + len);
+   cur += len;
+   }
+   goto loop;
+
+out:
+   btrfs_release_path(&path);
+   return ret;
+}
+
+static int count_dir_isize(struct btrfs_root *root, u64 ino, u64 *size)
+{
+   ASSERT(size);
+   u64 item_size;
+   u64 index_size;
+   int ret;
+
+   ret = __count_dir_isize(root, ino, BTRFS_DIR_ITEM_KEY,
+   &item_size);
+   if (ret)
+   goto out;
+
+   ret = __count_dir_isize(root, ino, BTRFS_DIR_INDEX_KEY,
+  &index_size);
+   if (ret)
+   goto out;
+
+   *size = item_size + index_size;
+
+out:
+   if (ret)
+   error("Failed to count root %llu INODE[%llu] root size",
+ root->objectid, ino);
+   return ret;
+}
+
 /*
  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.If repair
@@ -4807,7 +4894,6 @@ static int check_dir_item(struct btrfs_root *root, struct 
btrfs_key *key,
key->objectid, key->offset);
}
(*size) += name_len;
-
read_extent_buffer(node, namebuf, (unsigned long)(di + 1),
   len);
filetype = btrfs_dir_type(node, di);
@@ -5256,8 +5342,7 @@ static int check_inode_item(struct btrfs_root *root, 
struct btrfs_path *path,
imode_to_type(mode), key.objectid,
key.offset);
}
-   ret = check_dir_item(root, &key, path, &size,
-ext_ref);
+   ret = check_dir_item(root, &key, path, &size, ext_ref);
err |= ret;
break;
case BTRFS_EXTENT_DATA_KEY:
@@ -5280,6 +5365,10 @@ static int check_inode_item(struct btrfs_root *root, 
struct btrfs_path *path,
}
 
 out:
+   /* Only get isize again since it costs time much */
+   if (repair)
+   count_dir_isize(root, inode_id, &size);
+
/* verify INODE_ITEM nlink/isize/nbytes */
if (dir) {
if (nlink != 1) {
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 14/20] btrfs-progs: cmds-check.c: repair_fs_first_inode

2017-02-28 Thread Su Yue
Introduce 'repair_fs_first_inode' to repair first inode errors.

Signed-off-by: Su Yue 
---
 cmds-check.c | 69 
 1 file changed, 69 insertions(+)

diff --git a/cmds-check.c b/cmds-check.c
index 6dafbd7d..85e87884 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -5506,6 +5506,71 @@ out:
 }
 
 /*
+ * Normal INODE_ITEM_MISSING and INODE_REF_MISSING are handled in backref
+ * dir. Root dir should be handled specially because root dir is the root
+ * of fs.
+ *
+ * returns 0 means success
+ * returns <0 means failure
+ */
+static int repair_fs_first_inode(struct btrfs_root *root, int *err_ret)
+{
+   struct btrfs_trans_handle *trans;
+   struct btrfs_key key;
+   struct btrfs_path path;
+   int filetype = BTRFS_FT_DIR;
+   int ret = 0;
+
+   ASSERT(err_ret);
+   btrfs_init_path(&path);
+
+   if (*err_ret & INODE_REF_MISSING) {
+   key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+   key.type = BTRFS_INODE_REF_KEY;
+   key.offset = BTRFS_FIRST_FREE_OBJECTID;
+
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans))
+   return PTR_ERR(trans);
+
+   btrfs_release_path(&path);
+   ret = btrfs_search_slot(trans, root, &key, &path, 1, 1);
+   if (ret < 0 || !ret)
+   goto out;
+
+   ret = btrfs_insert_inode_ref(trans, root, "..", strlen(".."),
+BTRFS_FIRST_FREE_OBJECTID,
+BTRFS_FIRST_FREE_OBJECTID, 0);
+   if (ret > 0)
+   ret = -ret;
+   if (ret) {
+   error("Failed to insert first inode ref");
+   goto out;
+   }
+   printf("Add INODE_REF[%llu %llu] name %s",
+  BTRFS_FIRST_FREE_OBJECTID, BTRFS_FIRST_FREE_OBJECTID,
+  "..");
+   *err_ret &= ~INODE_REF_MISSING;
+   btrfs_commit_transaction(trans, root);
+   }
+
+   if (*err_ret & INODE_ITEM_MISSING) {
+   ret = repair_inode_item_missing(root,
+   BTRFS_FIRST_FREE_OBJECTID,
+   filetype, *err_ret);
+   if (ret)
+   goto out;
+   *err_ret &= ~INODE_ITEM_MISSING;
+   }
+
+out:
+   if (ret)
+   error("Failed to repair first inode");
+   btrfs_release_path(&path);
+   return ret;
+}
+
+/*
  * check first root dir's inode_item, inde_ref
  *
  * returns 0 means no error
@@ -5560,6 +5625,10 @@ static int check_fs_first_inode(struct btrfs_root *root, 
unsigned int ext_ref)
 
 out:
btrfs_release_path(&path);
+
+   if (err && repair)
+   repair_fs_first_inode(root, &err);
+
if (err & (INODE_ITEM_MISSING | INODE_ITEM_MISMATCH))
error("root dir INODE_ITEM is %s",
  err & INODE_ITEM_MISMATCH ? "mismatch" : "missing");
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/20] btrfs-progs: cmds-check.c: supports dir isize fix in lowmem

2017-02-28 Thread Su Yue
Add a function 'repair_dir_isize_lowmem' to support dir isize
repair in lowmem mode.

Signed-off-by: Su Yue 
---
 cmds-check.c | 69 +---
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 40f9d21e..f13ce317 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4880,6 +4880,62 @@ out:
 }
 
 /*
+ * Set dir isize to @isize
+ *
+ * Returns <0  means on error
+ * Returns  0  means successful repair
+ */
+static int repair_dir_isize_lowmem(struct btrfs_root *root,
+  struct btrfs_path *path,
+  u64 ino, u64 isize)
+{
+   struct btrfs_trans_handle *trans;
+   struct btrfs_inode_item *ii;
+   struct btrfs_key key;
+   struct btrfs_key research_key;
+   int ret;
+   int ret2;
+
+   key.objectid = ino;
+   key.type = BTRFS_INODE_ITEM_KEY;
+   key.offset = 0;
+
+   btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
+   btrfs_release_path(path);
+
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   goto out;
+   }
+
+   ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+   if (ret < 0)
+   goto out;
+   if (ret > 0) {
+   ret = -ENOENT;
+   goto out;
+   }
+
+   ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
+   struct btrfs_inode_item);
+   btrfs_set_inode_size(path->nodes[0], ii, isize);
+   btrfs_mark_buffer_dirty(path->nodes[0]);
+
+   printf("reset isize for inode %llu root %llu\n", ino,
+  root->root_key.objectid);
+
+   btrfs_commit_transaction(trans, root);
+out:
+   if (ret < 0)
+   error("failed to reset isize for inode %llu root %llu due to 
%s",
+ ino, root->root_key.objectid, strerror(-ret));
+   btrfs_release_path(path);
+   ret2 = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
+   return ret2 < 0 ? ret2 : ret;
+}
+
+/*
  * Check INODE_ITEM and related ITEMs (the same inode number)
  * 1. check link count
  * 2. check inode ref/extref
@@ -5015,9 +5071,16 @@ out:
}
 
if (isize != size) {
-   err |= ISIZE_ERROR;
-   error("root %llu DIR INODE [%llu] size(%llu) not equal 
to %llu",
- root->objectid, inode_id, isize, size);
+   if (repair)
+   ret = repair_dir_isize_lowmem(root, path,
+ inode_id,
+ size);
+
+   if (!repair || ret) {
+   err |= ISIZE_ERROR;
+   error("root %llu DIR INODE [%llu] size(%llu) 
not equal to %llu",
+ root->objectid, inode_id, isize, size);
+   }
}
} else {
if (nlink != refs) {
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/20] btrfs-progs: cmds-check.c: add punch_extent_hole

2017-02-28 Thread Su Yue
Introduce 'punch_extent_hole' to punch holes while
repair file extent.

Signed-off-by: Su Yue 
---
 cmds-check.c | 51 +--
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index ae80d5f0..d228fc62 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4483,6 +4483,7 @@ next:
memcpy(name_ret, namebuf, len);
*namelen_ret = len;
}
+
/* Check root dir ref */
if (ref_key->objectid == BTRFS_FIRST_FREE_OBJECTID) {
if (index != 0 || len != strlen("..") ||
@@ -5105,6 +5106,33 @@ next:
 }
 
 /*
+ * Wrapper function of btrfs_punch_hole.
+ *
+ * Returns 0 means success.
+ */
+static int punch_extent_hole(struct btrfs_root *root, u64 ino, u64 start,
+ u64 len)
+{
+   struct btrfs_trans_handle *trans;
+   int ret = 0;
+
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans))
+   return PTR_ERR(trans);
+
+   ret = btrfs_punch_hole(trans, root, ino, start, len);
+   if (ret) {
+   error("Failed to add hole [%llu, %llu] in inode [%llu]",
+ start, len, ino);
+   } else {
+   btrfs_commit_transaction(trans, root);
+   printf("Added hole [%llu, %llu] in inode [%llu]\n", start, len,
+ ino);
+   }
+   return ret;
+}
+
+/*
  * Check file extent datasum/hole, update the size of the file extents,
  * check and update the last offset of the file extent.
  *
@@ -5222,9 +5250,14 @@ static int check_file_extent(struct btrfs_root *root, 
struct btrfs_key *fkey,
error("root %llu EXTENT_DATA[%llu %llu] shouldn't be hole",
  root->objectid, fkey->objectid, fkey->offset);
} else if (!no_holes && *end != fkey->offset) {
-   err |= FILE_EXTENT_ERROR;
-   error("root %llu EXTENT_DATA[%llu %llu] interrupt",
- root->objectid, fkey->objectid, fkey->offset);
+   if (repair)
+   ret = punch_extent_hole(root, fkey->objectid,
+   *end, fkey->offset - *end);
+   if (!!repair || ret) {
+   err |= FILE_EXTENT_ERROR;
+   error("root %llu EXTENT_DATA[%llu %llu] interrupt",
+ root->objectid, fkey->objectid, fkey->offset);
+   }
}
 
*end += extent_num_bytes;
@@ -5724,9 +5757,15 @@ out:
}
 
if (!nbytes && !no_holes && extent_end < isize) {
-   err |= NBYTES_ERROR;
-   error("root %llu INODE[%llu] size (%llu) should have a 
file extent hole",
- root->objectid, inode_id, isize);
+   if (repair)
+   ret = punch_extent_hole(root, inode_id,
+   extent_end,
+   isize-extent_end);
+   if (!repair || ret) {
+   err |= NBYTES_ERROR;
+   error("root %llu INODE[%llu] size (%llu) should 
have a file extent hole",
+ root->objectid, inode_id, isize);
+   }
}
 
if (nbytes != extent_size) {
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/20] btrfs-progs: cmds-check.c: supports inode nbytes fix in lowmem

2017-02-28 Thread Su Yue
Add a function 'repair_inode_nbytes_lowmem' to correct inode item nbytes
error in lowmem mode.

Signed-off-by: Su Yue 
---
 cmds-check.c | 82 +++-
 1 file changed, 76 insertions(+), 6 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 9cc1932c..40f9d21e 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -1907,6 +1907,9 @@ static int process_one_leaf_v2(struct btrfs_root *root, 
struct btrfs_path *path,
 again:
err |= check_inode_item(root, path, ext_ref);
 
+   /* remodified cur since check_inode_item may change path */
+   cur = path->nodes[0];
+
if (err & LAST_ITEM)
goto out;
 
@@ -2256,6 +2259,7 @@ static int walk_down_tree_v2(struct btrfs_root *root, 
struct btrfs_path *path,
}
ret = process_one_leaf_v2(root, path, nrefs,
  level, ext_ref);
+   cur = path->nodes[*level];
break;
} else {
ret = btrfs_check_node(root, NULL, cur);
@@ -4819,10 +4823,69 @@ static int check_file_extent(struct btrfs_root *root, 
struct btrfs_key *fkey,
 }
 
 /*
+ * Set inode item nbytes to @nbytes
+ *
+ * Returns <0  means on error
+ * Returns  0  means successful repair
+ */
+static int repair_inode_nbytes_lowmem(struct btrfs_root *root,
+ struct btrfs_path *path,
+ u64 ino, u64 nbytes)
+{
+   struct btrfs_trans_handle *trans;
+   struct btrfs_inode_item *ii;
+   struct btrfs_key key;
+   struct btrfs_key research_key;
+   int ret;
+   int ret2;
+
+   key.objectid = ino;
+   key.type = BTRFS_INODE_ITEM_KEY;
+   key.offset = 0;
+   btrfs_item_key_to_cpu(path->nodes[0], &research_key, path->slots[0]);
+   btrfs_release_path(path);
+
+   trans = btrfs_start_transaction(root, 1);
+   if (IS_ERR(trans)) {
+   ret = PTR_ERR(trans);
+   goto out;
+   }
+
+   ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+   if (ret < 0)
+   goto out;
+   if (ret > 0) {
+   ret = -ENOENT;
+   goto out;
+   }
+
+   ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
+   struct btrfs_inode_item);
+   btrfs_set_inode_nbytes(path->nodes[0], ii, nbytes);
+   btrfs_mark_buffer_dirty(path->nodes[0]);
+
+   printf("reset nbytes for inode %llu root %llu\n", ino,
+  root->root_key.objectid);
+
+   btrfs_commit_transaction(trans, root);
+out:
+   if (ret < 0)
+   error("failed to reset nbytes for inode %llu root %llu due to 
%s",
+ ino, root->root_key.objectid, strerror(-ret));
+
+   /* research path */
+   btrfs_release_path(path);
+   ret2 = btrfs_search_slot(NULL, root, &research_key, path, 0, 0);
+   return ret2 < 0 ? ret2 : ret;
+}
+
+/*
  * Check INODE_ITEM and related ITEMs (the same inode number)
  * 1. check link count
  * 2. check inode ref/extref
  * 3. check dir item/index
+ * Be Careful, if repair is enable, @path may be changed.
+ * Remember to reassign any context about @path in repair mode.
  *
  * @ext_ref:   the EXTENDED_IREF feature
  *
@@ -4972,9 +5035,17 @@ out:
}
 
if (nbytes != extent_size) {
-   err |= NBYTES_ERROR;
-   error("root %llu INODE[%llu] nbytes(%llu) not equal to 
extent_size(%llu)",
- root->objectid, inode_id, nbytes, extent_size);
+   if (repair) {
+   ret = repair_inode_nbytes_lowmem(root, path,
+inode_id,
+extent_size);
+   }
+   if (!repair || ret) {
+   err |= NBYTES_ERROR;
+   error("root %llu INODE[%llu] nbytes(%llu) not 
equal to extent_size(%llu)",
+ root->objectid, inode_id, nbytes,
+ extent_size);
+   }
}
}
 
@@ -12798,11 +12869,10 @@ int cmd_check(int argc, char **argv)
}
 
/*
-* Not supported yet
+* Support partially
 */
if (repair && check_mode == CHECK_MODE_LOWMEM) {
-   error("low memory mode doesn't support repair yet");
-   exit(1);
+   warning("low memory mode support repair partially");
}
 
radix_tree_init();
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 18/20] btrfs-progs: cmds-check.c: repair nlinks lowmem

2017-02-28 Thread Su Yue
Introduce 'repair_inode_nlinks_lowmem'.
If ref is 0, move the inode to "lost + found".
Set inode item's nlink to ref_count.

Signed-off-by: Su Yue 
---
 cmds-check.c | 233 +--
 1 file changed, 179 insertions(+), 54 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 9ac08dfd..ae80d5f0 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -2912,15 +2912,17 @@ static int get_highest_inode(struct btrfs_trans_handle 
*trans,
return ret;
 }
 
+static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root,
+  struct btrfs_path *path, u64 ino,
+  char *name, u32 name_len, u8 filetype,
+  u64 *ref_count);
 static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
   struct btrfs_path *path,
   struct inode_record *rec)
 {
-   char *dir_name = "lost+found";
char namebuf[BTRFS_NAME_LEN] = {0};
-   u64 lost_found_ino;
-   u32 mode = 0700;
u8 type = 0;
int namelen = 0;
int name_recovered = 0;
@@ -2957,55 +2959,11 @@ static int repair_inode_nlinks(struct 
btrfs_trans_handle *trans,
}
 
if (rec->found_link == 0) {
-   ret = get_highest_inode(trans, root, path, &lost_found_ino);
-   if (ret < 0)
-   goto out;
-   lost_found_ino++;
-   ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
- BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
- mode);
-   if (ret < 0) {
-   fprintf(stderr, "Failed to create '%s' dir: %s\n",
-   dir_name, strerror(-ret));
-   goto out;
-   }
-   ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
-namebuf, namelen, type, NULL, 1, 0);
-   /*
-* Add ".INO" suffix several times to handle case where
-* "FILENAME.INO" is already taken by another file.
-*/
-   while (ret == -EEXIST) {
-   /*
-* Conflicting file name, add ".INO" as suffix * +1 for 
'.'
-*/
-   if (namelen + count_digits(rec->ino) + 1 >
-   BTRFS_NAME_LEN) {
-   ret = -EFBIG;
-   goto out;
-   }
-   snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
-".%llu", rec->ino);
-   namelen += count_digits(rec->ino) + 1;
-   ret = btrfs_add_link(trans, root, rec->ino,
-lost_found_ino, namebuf,
-namelen, type, NULL, 1, 0);
-   }
-   if (ret < 0) {
-   fprintf(stderr,
-   "Failed to link the inode %llu to %s dir: %s\n",
-   rec->ino, dir_name, strerror(-ret));
+   ret = link_inode_to_lostfound(trans, root, path, rec->ino,
+ namebuf, namelen, type,
+ (u64 *)&rec->found_link);
+   if (ret)
goto out;
-   }
-   /*
-* Just increase the found_link, don't actually add the
-* backref. This will make things easier and this inode
-* record will be freed after the repair is done.
-* So fsck will not report problem about this inode.
-*/
-   rec->found_link++;
-   printf("Moving file '%.*s' to '%s' dir since it has no valid 
backref\n",
-  namelen, namebuf, dir_name);
}
printf("Fixed the nlink of inode %llu\n", rec->ino);
 out:
@@ -5430,6 +5388,160 @@ out:
 }
 
 /*
+ * Link inode to dir 'lost+found'. Increase @ref_count.
+ *
+ * Returns 0 means success.
+ * Returns <0 means failure.
+ */
+static int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root,
+  struct btrfs_path *path,
+  u64 ino, char *namebuf, u32 name_len,
+  u8 filetype, u64 *ref_count)
+{
+   char *dir_name = "lost+found";
+   u64 lost_found_ino;
+   int ret;
+   u32 mode = 0700;
+
+   btrfs_release_path(path);
+   ret = get_highest_inode(trans, root, path, &lost_found_ino);
+   if (ret < 0)
+  

[PATCH 15/20] btrfs-progs: cmds-check.c: introduce repair_ternary_lowmem

2017-02-28 Thread Su Yue
Introduce 'repair_ternary_lowmem' to repair error while checking
dir_item/index, inode_ref by the rule:
1. If two of three is missing or mismatched, delete the existed one.
2. If one of three is missing or mismatched, add the missing one.

Signed-off-by: Su Yue 
---
 cmds-check.c | 57 +
 1 file changed, 57 insertions(+)

diff --git a/cmds-check.c b/cmds-check.c
index 85e87884..9a76107e 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4388,6 +4388,63 @@ static void print_inode_ref_err(struct btrfs_root *root, 
struct btrfs_key *key,
   namebuf, filetype);
 }
 
+static int repair_inode_item_missing(struct btrfs_root *root, u64 ino,
+u8 filetype, int err);
+/*
+ * The ternary contains dir item, dir index and relative inode ref.
+ * the repair function will handle errs: INODE_MISSING, DIR_INDEX_MISSING
+ * DIR_INDEX_MISMATCH, DIR_ITEM_MISSING, DIR_ITEM_MISMATCH by the follow
+ * strategy:
+ * If two of three is missing or mismatched, delete the existed one.
+ * If one of three is missing or mismatched, add the missing one.
+ *
+ * returns 0 mens success.
+ */
+int repair_ternary_lowmem(struct btrfs_root *root, u64 dir_ino, u64 ino,
+ u64 index, char *name, int name_len, u8 filetype,
+ int err)
+{
+   struct btrfs_trans_handle *trans;
+   int stage = 0;
+   int ret;
+
+   if (!err)
+   return 0;
+   /*
+* stage shall be one of follow valild values:
+*  0: Fine, nothing to do.
+*  1: One of three is wrong, so add missing one.
+*  2: Two of three is wrong, so delete existed one.
+*/
+   if (err & (DIR_INDEX_MISMATCH | DIR_INDEX_MISSING))
+   ++stage;
+   if (err & (DIR_ITEM_MISMATCH | DIR_ITEM_MISSING))
+   ++stage;
+   if (err & (INODE_REF_MISSING))
+   ++stage;
+
+   /* stage must be smllarer than 3 */
+   ASSERT(stage < 3);
+
+   trans = btrfs_start_transaction(root, 2);
+   if (stage == 2) {
+   ret = btrfs_unlink(trans, root, ino, dir_ino, index, name,
+  name_len, 0);
+   if (ret)
+   goto out;
+   }
+   if (stage == 1) {
+   ret = btrfs_add_link(trans, root, ino, dir_ino, name, name_len,
+  filetype, &index, 1, 1);
+   if (ret)
+   goto out;
+   }
+
+   btrfs_commit_transaction(trans, root);
+out:
+   return ret;
+}
+
 /*
  * Traverse the given INODE_REF and call find_dir_item() to find related
  * DIR_ITEM/DIR_INDEX.If repair is enable, research @ref_key and
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 17/20] btrfs-progs: cmds-check.c: repair inode ref

2017-02-28 Thread Su Yue
Call repair_ternary_lowmem while checking inode ref.

Introduce 'repair_dir_item' calls above function to repair dir_item.

Signed-off-by: Su Yue 
---
 cmds-check.c | 29 +++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 1b35a5fd..9ac08dfd 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4445,6 +4445,8 @@ out:
return ret;
 }
 
+static int research_path(struct btrfs_root *root, struct btrfs_path *path,
+struct btrfs_key *key);
 /*
  * Traverse the given INODE_REF and call find_dir_item() to find related
  * DIR_ITEM/DIR_INDEX.If repair is enable, research @ref_key and
@@ -4475,26 +4477,39 @@ static int check_inode_ref(struct btrfs_root *root, 
struct btrfs_key *ref_key,
u32 cur = 0;
long len;
u64 index;
-   int err = 0;
+   int ret, err = 0;
int tmp_err;
int slot;
+   int need_research = 0;
+
+research:
+   if (need_research) {
+   ret = research_path(root, path, ref_key);
+   need_research = 1;
+   if (ret)
+   return ret > 0 ? 0 : ret;
+   }
 
+   err = 0;
+   cur = 0;
+   *refs = 0;
location.objectid = ref_key->objectid;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
node = path->nodes[0];
slot = path->slots[0];
 
+   memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
ref = btrfs_item_ptr(node, slot, struct btrfs_inode_ref);
total = btrfs_item_size_nr(node, slot);
 
 next:
/* Update inode ref count */
(*refs)++;
-
tmp_err = 0;
index = btrfs_inode_ref_index(node, ref);
name_len = btrfs_inode_ref_name_len(node, ref);
+
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
} else {
@@ -4536,6 +4551,16 @@ next:
tmp_err |= find_dir_item(root, &key, &location, namebuf, len,
imode_to_type(mode));
 end:
+   if (tmp_err && repair) {
+   ret = repair_ternary_lowmem(root, ref_key->offset,
+   ref_key->objectid, index, namebuf,
+   name_len, imode_to_type(mode),
+   tmp_err);
+   if (!ret) {
+   need_research = true;
+   goto research;
+   }
+   }
print_inode_ref_err(root, ref_key, index, namebuf, name_len,
imode_to_type(mode), tmp_err);
err |= tmp_err;
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 16/20] btrfs-progs: cmds-check.c: Introduce repair_dir_item

2017-02-28 Thread Su Yue
Introduce 'repair_dir_item' to repair dir item/index missing/mismatch
and relative inode item missing while checking.

Signed-off-by: Su Yue 
---
 cmds-check.c | 78 
 1 file changed, 78 insertions(+)

diff --git a/cmds-check.c b/cmds-check.c
index 9a76107e..1b35a5fd 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -4906,6 +4906,60 @@ out:
 }
 
 /*
+ * Call repair_inode_item_missing and repair_ternary_lowmem to repair
+ *
+ * @diff_ret: same as repair_ternary_lowmem
+ *
+ * Returns 0 means success
+ */
+static int repair_dir_item(struct btrfs_root *root, u64 dirid, u64 ino,
+  u64 index, u8 filetype, char *namebuf, u32 name_len,
+  int *err_ret)
+{
+   int ret = 0;
+
+   if (*err_ret & INODE_ITEM_MISSING) {
+   ret = repair_inode_item_missing(root, ino, filetype, *err_ret);
+   if (!ret)
+   *err_ret &= ~(INODE_ITEM_MISMATCH |
+ INODE_ITEM_MISSING);
+   }
+
+   if (*err_ret & ~(INODE_ITEM_MISMATCH | INODE_ITEM_MISSING)) {
+   ret = repair_ternary_lowmem(root, dirid, ino, index, namebuf,
+   name_len, filetype, *err_ret);
+   if (!ret) {
+   *err_ret &= ~(DIR_INDEX_MISMATCH | DIR_INDEX_MISSING);
+   *err_ret &= ~(DIR_ITEM_MISMATCH | DIR_ITEM_MISSING);
+   *err_ret &= ~(INODE_REF_MISSING);
+   }
+   }
+
+   return ret;
+}
+
+/*
+ * Research @path by the @key, if it fails then change path to previous item.
+ *
+ * returns 0 means success
+ * returns <0 means failure
+ * return >0 means jumped to previous item
+ */
+static int research_path(struct btrfs_root *root, struct btrfs_path *path,
+struct btrfs_key *key)
+{
+   int ret;
+   /* research path */
+   btrfs_release_path(path);
+   ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
+   if (ret > 0) {
+   ret = btrfs_previous_item(root, path, key->objectid,
+ key->type);
+   }
+
+   return ret;
+}
+/*
  * Traverse the given DIR_ITEM/DIR_INDEX and check related INODE_ITEM and
  * call find_inode_ref() to check related INODE_REF/INODE_EXTREF.If repair
  * is enable, do repair and research by @path->nodes[0].
@@ -4941,6 +4995,7 @@ static int check_dir_item(struct btrfs_root *root, struct 
btrfs_key *key,
int ret;
int err = 0;
int tmp_err;
+   int need_research = 0;
 
/*
 * For DIR_ITEM set index to (u64)-1, so that find_inode_ref
@@ -4948,10 +5003,22 @@ static int check_dir_item(struct btrfs_root *root, 
struct btrfs_key *key,
 */
index = (key->type == BTRFS_DIR_INDEX_KEY) ? key->offset : (u64)-1;
 
+research:
+   if (need_research) {
+   ret = research_path(root, path, key);
+   need_research = 0;
+   if (ret)
+   return ret > 0 ? 0 : ret;
+   }
+
+   err = 0;
+   cur = 0;
node = path->nodes[0];
slot = path->slots[0];
+
di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(node, slot);
+   memset(namebuf, 0, sizeof(namebuf) / sizeof(*namebuf));
 
while (cur < total) {
data_len = btrfs_dir_data_len(node, di);
@@ -5021,6 +5088,16 @@ static int check_dir_item(struct btrfs_root *root, 
struct btrfs_key *key,
 next:
btrfs_release_path(path);
 
+   if (tmp_err && repair) {
+   ret = repair_dir_item(root, key->objectid,
+ location.objectid, index,
+ imode_to_type(mode), namebuf,
+ name_len, &tmp_err);
+   if (!ret) {
+   need_research = 1;
+   goto research;
+   }
+   }
print_dir_item_err(root, key, location.objectid,
   index, namebuf, name_len, filetype,
   tmp_err);
@@ -5043,6 +5120,7 @@ next:
err |= ret > 0 ? ENOENT : ret;
return err;
 }
+
 /*
  * Check file extent datasum/hole, update the size of the file extents,
  * check and update the last offset of the file extent.
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 20/20] btrfs-progs: fsck-check: Allow fsck check test to repair in lowmem mode for certain test cases

2017-02-28 Thread Su Yue
From: Qu Wenruo 

Since lowmem mode can repair certain corruptions (mostly in fs tree),
insert a beacon into each fsck test cases to allow some of them be
tested for lowmem mode.

With this patch, fsck option override will check the beacon file
".lowmem_repairbale" in the same directory of the test image, and if the
beacon exists, then it will also run lowmem mode repair to repair the
image.

Signed-off-by: Qu Wenruo 
---
 tests/common.local | 14 +-
 tests/fsck-tests/004-no-dir-index/.lowmem_repairable   |  0
 .../fsck-tests/009-no-dir-item-or-index/.lowmem_repairable |  0
 .../010-no-rootdir-inode-item/.lowmem_repairable   |  0
 tests/fsck-tests/011-no-inode-item/.lowmem_repairable  |  0
 tests/fsck-tests/016-wrong-inode-nbytes/.lowmem_repairable |  0
 .../017-missing-all-file-extent/.lowmem_repairable |  0
 7 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 tests/fsck-tests/004-no-dir-index/.lowmem_repairable
 create mode 100644 tests/fsck-tests/009-no-dir-item-or-index/.lowmem_repairable
 create mode 100644 
tests/fsck-tests/010-no-rootdir-inode-item/.lowmem_repairable
 create mode 100644 tests/fsck-tests/011-no-inode-item/.lowmem_repairable
 create mode 100644 tests/fsck-tests/016-wrong-inode-nbytes/.lowmem_repairable
 create mode 100644 
tests/fsck-tests/017-missing-all-file-extent/.lowmem_repairable

diff --git a/tests/common.local b/tests/common.local
index 4f56bb08..af372f16 100644
--- a/tests/common.local
+++ b/tests/common.local
@@ -15,11 +15,23 @@ TEST_ARGS_CHECK=--mode=lowmem
 # gets arguments of a current command and can decide if the argument insertion
 # should happen, eg. if some option combination does not make sense or would
 # break tests
+#
+# Return 0 if we need to skip option override
+# Return 1 if we don't need to skip option override
 _skip_spec()
 {
+   beacon=.lowmem_repairable
+
+   # For loemem repair, only support fs tree repair yet
+   # So we place lowmem repair beacon in the same dir of the
+   # test case
if echo "$TEST_ARGS_CHECK" | grep -q 'mode=lowmem' &&
   echo "$@" | grep -q -- '--repair'; then
-   return 0
+   dir="$(dirname ${@: -1})"
+   if [ -f ${dir}/${beacon} ]; then
+   return 1;
+   fi
+   return 0;
fi
return 1
 }
diff --git a/tests/fsck-tests/004-no-dir-index/.lowmem_repairable 
b/tests/fsck-tests/004-no-dir-index/.lowmem_repairable
new file mode 100644
index ..e69de29b
diff --git a/tests/fsck-tests/009-no-dir-item-or-index/.lowmem_repairable 
b/tests/fsck-tests/009-no-dir-item-or-index/.lowmem_repairable
new file mode 100644
index ..e69de29b
diff --git a/tests/fsck-tests/010-no-rootdir-inode-item/.lowmem_repairable 
b/tests/fsck-tests/010-no-rootdir-inode-item/.lowmem_repairable
new file mode 100644
index ..e69de29b
diff --git a/tests/fsck-tests/011-no-inode-item/.lowmem_repairable 
b/tests/fsck-tests/011-no-inode-item/.lowmem_repairable
new file mode 100644
index ..e69de29b
diff --git a/tests/fsck-tests/016-wrong-inode-nbytes/.lowmem_repairable 
b/tests/fsck-tests/016-wrong-inode-nbytes/.lowmem_repairable
new file mode 100644
index ..e69de29b
diff --git a/tests/fsck-tests/017-missing-all-file-extent/.lowmem_repairable 
b/tests/fsck-tests/017-missing-all-file-extent/.lowmem_repairable
new file mode 100644
index ..e69de29b
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/20] btrfs-progs: cmds-check.c: change find_dir_index/item

2017-02-28 Thread Su Yue
For further lowmem_repair, introduce 'find_dir_index' to
get the index by other inode item information.
Remove 'check_dir_item' error msg print.

Adjust 'find_dir_item' args and remove err msg print.

Signed-off-by: Su Yue 
---
 cmds-check.c | 231 +++
 1 file changed, 155 insertions(+), 76 deletions(-)

diff --git a/cmds-check.c b/cmds-check.c
index 892a22ba..c45dfae4 100644
--- a/cmds-check.c
+++ b/cmds-check.c
@@ -130,6 +130,8 @@ struct data_backref {
 #define LAST_ITEM  (1<<15) /* Complete this tree traversal */
 #define ROOT_REF_MISSING   (1<<16) /* ROOT_REF not found */
 #define ROOT_REF_MISMATCH  (1<<17) /* ROOT_REF found but not match */
+#define DIR_INDEX_MISSING   (1<<18) /* INODE_INDEX not found */
+#define DIR_INDEX_MISMATCH  (1<<19) /* INODE_INDEX found but not match */
 
 static inline struct data_backref* to_data_backref(struct extent_backref *back)
 {
@@ -4133,29 +4135,30 @@ out:
return err;
 }
 
+static int find_dir_index(struct btrfs_root *root, u64 dirid, u64 location_id,
+ u64 *index_ret, char *namebuf, u32 name_len,
+ u8 file_type);
 /*
  * Find DIR_ITEM/DIR_INDEX for the given key and check it with the specified
  * INODE_REF/INODE_EXTREF match.
  *
  * @root:  the root of the fs/file tree
- * @ref_key:   the key of the INODE_REF/INODE_EXTREF
- * @key:   the key of the DIR_ITEM/DIR_INDEX
- * @index: the index in the INODE_REF/INODE_EXTREF, be used to
- * distinguish root_dir between normal dir/file
- * @name:  the name in the INODE_REF/INODE_EXTREF
- * @namelen:   the length of name in the INODE_REF/INODE_EXTREF
- * @mode:  the st_mode of INODE_ITEM
+ * @key:   the key of the DIR_ITEM/DIR_INDEX, key->offset will be right
+ *  value while find index
+ * @location_key: location key of the struct btrfs_dir_item to match
+ * @name:  the name to match
+ * @namelen:   the length of name
+ * @file_type: the type of file to math
  *
  * Return 0 if no error occurred.
- * Return ROOT_DIR_ERROR if found DIR_ITEM/DIR_INDEX for root_dir.
- * Return DIR_ITEM_MISSING if couldn't find DIR_ITEM/DIR_INDEX for normal
- * dir/file.
- * Return DIR_ITEM_MISMATCH if INODE_REF/INODE_EXTREF and DIR_ITEM/DIR_INDEX
- * not match for normal dir/file.
+ * Return DIR_ITEM_MISSING/DIR_INDEX_MISSING if couldn't find
+ * DIR_ITEM/DIR_INDEX
+ * Return DIR_ITEM_MISMATCH/DIR_INDEX_MISMATCH if INODE_REF/INODE_EXTREF
+ * and DIR_ITEM/DIR_INDEX mismatch
  */
-static int find_dir_item(struct btrfs_root *root, struct btrfs_key *ref_key,
-struct btrfs_key *key, u64 index, char *name,
-u32 namelen, u32 mode)
+static int find_dir_item(struct btrfs_root *root, struct btrfs_key *key,
+struct btrfs_key *location_key, char *name,
+u32 namelen, u8 file_type)
 {
struct btrfs_path path;
struct extent_buffer *node;
@@ -4165,104 +4168,166 @@ static int find_dir_item(struct btrfs_root *root, 
struct btrfs_key *ref_key,
u32 total;
u32 cur = 0;
u32 len;
-   u32 name_len;
u32 data_len;
u8 filetype;
int slot;
int ret;
 
+   /* get the index by traversing all index */
+   if (key->type == BTRFS_DIR_INDEX_KEY && key->offset == (u64)-1) {
+   ret = find_dir_index(root, key->objectid,
+location_key->objectid, &key->offset,
+name, namelen, file_type);
+   if (ret)
+   ret = DIR_INDEX_MISSING;
+   return ret;
+   }
+
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, key, &path, 0, 0);
-   if (ret < 0) {
-   ret = DIR_ITEM_MISSING;
+   if (ret) {
+   ret = key->type == BTRFS_DIR_ITEM_KEY ? DIR_ITEM_MISSING :
+   DIR_INDEX_MISSING;
goto out;
}
 
-   /* Process root dir and goto out*/
-   if (index == 0) {
-   if (ret == 0) {
-   ret = ROOT_DIR_ERROR;
-   error(
-   "root %llu INODE %s[%llu %llu] ROOT_DIR shouldn't have 
%s",
-   root->objectid,
-   ref_key->type == BTRFS_INODE_REF_KEY ?
-   "REF" : "EXTREF",
-   ref_key->objectid, ref_key->offset,
-   key->type == BTRFS_DIR_ITEM_KEY ?
-   "DIR_ITEM" : "DIR_INDEX");
-   } else {
-   ret = 0;
-   }
+   /* Check whether inode_id/filetype/name match */
+   node = path.nodes[0];
+   slot = path.slots[0];
+   di = btrfs_item_ptr(node, slot, struct btrfs_dir_item);
+   total = btrfs_item_si

[PATCH 1/2] Btrfs: fix unexpected file hole after disk errors

2017-02-28 Thread Liu Bo
Btrfs creates hole extents to cover any unwritten section right before
doing buffer writes after commit 3ac0d7b96a26 ("btrfs: Change the expanding
write sequence to fix snapshot related bug.").

However, that takes the start position of the buffered write to compare
against the current EOF, hole extents would be created only if (EOF <
start).

If the EOF is at the middle of the buffered write, no hole extents will be
created and a file hole without a hole extent is left in this file.

This bug was revealed by generic/019 in fstests.  'fsstress' in this test
may create the above situation and the test then fails all requests
including writes, so the buffer write which is supposed to cover the
hole (without the hole extent) couldn't make it on disk.  Running fsck
against such btrfs ends up with detecting file extent holes.

Things could be more serious, some stale data would be exposed to
userspace if files with this kind of hole are truncated to a position of
the hole, because the on-disk inode size is beyond the last extent in the
file.

This fixes the bug by comparing the end position against the EOF.

Signed-off-by: Liu Bo 
---
 fs/btrfs/file.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da2..0be837b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1861,11 +1861,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
+   end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
-   if (start_pos > oldsize) {
+   if (end_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
-   end_pos = round_up(pos + count,
-  fs_info->sectorsize);
err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/7] Btrfs: create a helper for getting chunk map

2017-02-28 Thread Liu Bo
On Mon, Feb 20, 2017 at 11:20:33AM +0800, Qu Wenruo wrote:
> 
> 
> At 02/18/2017 09:28 AM, Liu Bo wrote:
> > We have similar code here and there, this merges them into a helper.
> > 
> > Signed-off-by: Liu Bo 
> 
> Looks good overall.
> 
> Although small nitpick inlined below.

Thank you for going through this.

> > ---
> >  fs/btrfs/extent_io.c |   3 +-
> >  fs/btrfs/volumes.c   | 163 
> > +--
> >  fs/btrfs/volumes.h   |   2 +-
> >  3 files changed, 57 insertions(+), 111 deletions(-)
> > 
> > diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
> > index 4ac383a..609ece1 100644
> > --- a/fs/btrfs/extent_io.c
> > +++ b/fs/btrfs/extent_io.c
> > @@ -2007,14 +2007,13 @@ int repair_io_failure(struct inode *inode, u64 
> > start, u64 length, u64 logical,
> > u64 map_length = 0;
> > u64 sector;
> > struct btrfs_bio *bbio = NULL;
> > -   struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
> > int ret;
> > 
> > ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
> > BUG_ON(!mirror_num);
> > 
> > /* we can't repair anything in raid56 yet */
> > -   if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
> > +   if (btrfs_is_parity_mirror(fs_info, logical, length, mirror_num))
> 
> Not sure if such small parameter cleanup can be split into a separate patch.
> At least it's less related to the get_chunk_map() helper.
>

But it's not a cleanup, it is get_chunk_map() that needs @fs_info.

> > return 0;
> > 
> > bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
> > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
> > index 3c3c69c..c52b0fe 100644
> > --- a/fs/btrfs/volumes.c
> > +++ b/fs/btrfs/volumes.c
> > @@ -2794,10 +2794,38 @@ static int btrfs_del_sys_chunk(struct btrfs_fs_info 
> > *fs_info,
> > return ret;
> >  }
> > 
> > +static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
> > +   u64 logical, u64 length)
> > +{
> > +   struct extent_map_tree *em_tree;
> > +   struct extent_map *em;
> > +
> > +   em_tree = &fs_info->mapping_tree.map_tree;
> > +   read_lock(&em_tree->lock);
> > +   em = lookup_extent_mapping(em_tree, logical, length);
> > +   read_unlock(&em_tree->lock);
> > +
> > +   if (!em) {
> > +   btrfs_crit(fs_info, "unable to find logical %llu len %llu",
> > +   logical, length);
> 
> Nice error message, would be quite helpful when we hit some bug later.
> 
> > +   return ERR_PTR(-EINVAL);
> 
> Normally I'd return -ENOENT, not sure what's the correct return here though.
>

So I tried to be consistent with the error handling of other places of searching
chunk mapping tree.

I think EINVAL makes sense here, either @logical or @length is not valid.

> > +   }
> > +
> > +   if (em->start > logical || em->start + em->len < logical) {
> > +   btrfs_crit(fs_info,
> > +  "found a bad mapping, wanted %llu, found %llu-%llu",
> > +  logical, em->start, em->start + em->len);
> 
> Better outputting @length also.
>

OK, I'll update it with @length.

Thanks,

-liubo

> Thanks,
> Qu
> 
> > +   free_extent_map(em);
> > +   return ERR_PTR(-EINVAL);
> > +   }
> > +
> > +   /* callers are responsible for dropping em's ref. */
> > +   return em;
> > +}
> > +
> >  int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
> >struct btrfs_fs_info *fs_info, u64 chunk_offset)
> >  {
> > -   struct extent_map_tree *em_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > u64 dev_extent_len = 0;
> > @@ -2805,23 +2833,15 @@ int btrfs_remove_chunk(struct btrfs_trans_handle 
> > *trans,
> > int i, ret = 0;
> > struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
> > 
> > -   em_tree = &fs_info->mapping_tree.map_tree;
> > -
> > -   read_lock(&em_tree->lock);
> > -   em = lookup_extent_mapping(em_tree, chunk_offset, 1);
> > -   read_unlock(&em_tree->lock);
> > -
> > -   if (!em || em->start > chunk_offset ||
> > -   em->start + em->len < chunk_offset) {
> > +   em = get_chunk_map(fs_info, chunk_offset, 1);
> > +   if (IS_ERR(em)) {
> > /*
> >  * This is a logic error, but we don't want to just rely on the
> >  * user having built with ASSERT enabled, so if ASSERT doesn't
> >  * do anything we still error out.
> >  */
> > ASSERT(0);
> > -   if (em)
> > -   free_extent_map(em);
> > -   return -EINVAL;
> > +   return PTR_ERR(em);
> > }
> > map = em->map_lookup;
> > mutex_lock(&fs_info->chunk_mutex);
> > @@ -4888,7 +4908,6 @@ int btrfs_finish_chunk_alloc(struct 
> > btrfs_trans_handle *trans,
> > struct btrfs_device *device;
> > struct btrfs_chunk *chunk;
> > struct btrfs_stripe *stripe;
> > -   struct extent_map_tree *em_tree;
> > struct extent_map *em;
> > struct map_lookup *map;
> > siz

[PATCH 2/2] Btrfs: remove start_pos

2017-02-28 Thread Liu Bo
@pos, not aligned @start_pos, should be used to check whether the eof page
needs to be marked as readonly, thus @start_pos can be removed.

Signed-off-by: Liu Bo 
---
 fs/btrfs/file.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0be837b..ef88e6d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1814,7 +1814,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
-   u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
@@ -1822,7 +1821,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
loff_t pos;
size_t count;
loff_t oldsize;
-   int clean_page = 0;
 
inode_lock(inode);
err = generic_write_checks(iocb, from);
@@ -1860,7 +1858,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 
pos = iocb->ki_pos;
count = iov_iter_count(from);
-   start_pos = round_down(pos, fs_info->sectorsize);
end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (end_pos > oldsize) {
@@ -1870,8 +1867,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
goto out;
}
-   if (start_pos > round_up(oldsize, fs_info->sectorsize))
-   clean_page = 1;
}
 
if (sync)
@@ -1883,7 +1878,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
num_written = __btrfs_buffered_write(file, from, pos);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
-   if (clean_page)
+   if (oldsize < pos)
pagecache_isize_extended(inode, oldsize,
i_size_read(inode));
}
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: assertion failed: last_size == new_size, file: fs/btrfs/inode.c

2017-02-28 Thread Liu Bo
On Mon, Feb 27, 2017 at 11:23:42AM -0500, Dave Jones wrote:
> On Mon, Feb 27, 2017 at 07:53:48AM -0800, Liu Bo wrote:
>  > On Sun, Feb 26, 2017 at 07:18:42PM -0500, Dave Jones wrote:
>  > > Hitting this fairly frequently.. I'm not sure if this is the same bug 
> I've
>  > > been hitting occasionally since 4.9. The assertion looks new to me at 
> least.
>  > >
>  > 
>  > It was recently introduced by my commit and used to catch data loss at 
> truncate.
>  > 
>  > Were you running the test with a mkfs.btrfs -O NO_HOLES?
>  > (We just queued a fix for the NO_HOLES case in btrfs-next.)
> 
> No, a fs created with default mkfs.btrfs options.

I have this patch[1] to fix a bug which results in file hole extent, and this
bug could lead us to hit the assertion.

Would you try to run the test w/ it, please?

[1]: https://patchwork.kernel.org/patch/9597281/

Thanks,

-liubo
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] Btrfs: remove start_pos

2017-02-28 Thread Liu Bo
@pos, not aligned @start_pos, should be used to check whether the eof page
needs to be marked as readonly, thus @start_pos can be removed.

Signed-off-by: Liu Bo 
---
 fs/btrfs/file.c | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0be837b..ef88e6d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1814,7 +1814,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
-   u64 start_pos;
u64 end_pos;
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
@@ -1822,7 +1821,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
loff_t pos;
size_t count;
loff_t oldsize;
-   int clean_page = 0;
 
inode_lock(inode);
err = generic_write_checks(iocb, from);
@@ -1860,7 +1858,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 
pos = iocb->ki_pos;
count = iov_iter_count(from);
-   start_pos = round_down(pos, fs_info->sectorsize);
end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (end_pos > oldsize) {
@@ -1870,8 +1867,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
inode_unlock(inode);
goto out;
}
-   if (start_pos > round_up(oldsize, fs_info->sectorsize))
-   clean_page = 1;
}
 
if (sync)
@@ -1883,7 +1878,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
num_written = __btrfs_buffered_write(file, from, pos);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
-   if (clean_page)
+   if (oldsize < pos)
pagecache_isize_extended(inode, oldsize,
i_size_read(inode));
}
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] Btrfs: fix unexpected file hole after disk errors

2017-02-28 Thread Liu Bo
Btrfs creates hole extents to cover any unwritten section right before
doing buffer writes after commit 3ac0d7b96a26 ("btrfs: Change the expanding
write sequence to fix snapshot related bug.").

However, that takes the start position of the buffered write to compare
against the current EOF, hole extents would be created only if (EOF <
start).

If the EOF is at the middle of the buffered write, no hole extents will be
created and a file hole without a hole extent is left in this file.

This bug was revealed by generic/019 in fstests.  'fsstress' in this test
may create the above situation and the test then fails all requests
including writes, so the buffer write which is supposed to cover the
hole (without the hole extent) couldn't make it on disk.  Running fsck
against such btrfs ends up with detecting file extent holes.

Things could be more serious, some stale data would be exposed to
userspace if files with this kind of hole are truncated to a position of
the hole, because the on-disk inode size is beyond the last extent in the
file.

This fixes the bug by comparing the end position against the EOF.

Signed-off-by: Liu Bo 
---
 fs/btrfs/file.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da2..0be837b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1861,11 +1861,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
+   end_pos = round_up(pos + count, fs_info->sectorsize);
oldsize = i_size_read(inode);
-   if (start_pos > oldsize) {
+   if (end_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
-   end_pos = round_up(pos + count,
-  fs_info->sectorsize);
err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 8/8] nowait aio: btrfs

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Return EAGAIN if any of the following checks fail
 + i_rwsem is not lockable
 + NODATACOW or PREALLOC is not set
 + Cannot nocow at the desired location
 + Writing beyond end of file which is not allocated

Signed-off-by: Goldwyn Rodrigues 
---
 fs/btrfs/file.c  | 25 -
 fs/btrfs/inode.c |  3 +++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b5c5da2..8640280 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1819,12 +1819,29 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t num_written = 0;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
ssize_t err;
-   loff_t pos;
-   size_t count;
+   loff_t pos = iocb->ki_pos;
+   size_t count = iov_iter_count(from);
loff_t oldsize;
int clean_page = 0;
 
-   inode_lock(inode);
+   if ((iocb->ki_flags & IOCB_NOWAIT) &&
+   (iocb->ki_flags & IOCB_DIRECT)) {
+   /* Don't sleep on inode rwsem */
+   if (!inode_trylock(inode))
+   return -EAGAIN;
+   /*
+* We will allocate space in case nodatacow is not set,
+* so bail
+*/
+   if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) ||
+   check_can_nocow(inode, pos, &count) <= 0) {
+   inode_unlock(inode);
+   return -EAGAIN;
+   }
+   } else
+   inode_lock(inode);
+
err = generic_write_checks(iocb, from);
if (err <= 0) {
inode_unlock(inode);
@@ -1858,8 +1875,6 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 */
update_time_for_write(inode);
 
-   pos = iocb->ki_pos;
-   count = iov_iter_count(from);
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
if (start_pos > oldsize) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e861a0..c5041ea 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -8681,6 +8681,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct 
iov_iter *iter)
if (offset + count <= inode->i_size) {
inode_unlock(inode);
relock = true;
+   } else if (iocb->ki_flags & IOCB_NOWAIT) {
+   ret = -EAGAIN;
+   goto out;
}
ret = btrfs_delalloc_reserve_space(inode, offset, count);
if (ret)
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/8] nowait aio: Introduce IOMAP_NOWAIT

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

IOCB_NOWAIT translates to IOMAP_NOWAIT for iomaps.
This is used by XFS in the XFS patch.

Signed-off-by: Goldwyn Rodrigues 
---
 fs/iomap.c| 2 ++
 include/linux/iomap.h | 1 +
 2 files changed, 3 insertions(+)

diff --git a/fs/iomap.c b/fs/iomap.c
index a51cb4c..3fb68d2 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -883,6 +883,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, 
struct iomap_ops *ops,
} else {
dio->flags |= IOMAP_DIO_WRITE;
flags |= IOMAP_WRITE;
+   if (iocb->ki_flags & IOCB_NOWAIT)
+   flags |= IOMAP_NOWAIT;
}
 
if (mapping->nrpages) {
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index a4c94b8..d1c33ef 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -51,6 +51,7 @@ struct iomap {
 #define IOMAP_REPORT   (1 << 2) /* report extent status, e.g. FIEMAP */
 #define IOMAP_FAULT(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT   (1 << 4) /* direct I/O */
+#define IOMAP_NOWAIT   (1 << 5) /* Don't wait for writeback */
 
 struct iomap_ops {
/*
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/8] nowait aio: return if direct write will trigger writeback

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Find out if the write will trigger a wait due to writeback. If yes,
return -EAGAIN.

This introduces a new function filemap_range_has_page() which
returns true if the file's mapping has a page within the range
mentioned.

Return -EINVAL for buffered AIO: there are multiple causes of
delay such as page locks, dirty throttling logic, page loading
from disk etc. which cannot be taken care of.

Signed-off-by: Goldwyn Rodrigues 
---
 include/linux/fs.h |  2 ++
 mm/filemap.c   | 50 +++---
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/include/linux/fs.h b/include/linux/fs.h
index ab2f556..527ef53 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2494,6 +2494,8 @@ extern int filemap_fdatawait(struct address_space *);
 extern void filemap_fdatawait_keep_errors(struct address_space *);
 extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
   loff_t lend);
+extern int filemap_range_has_page(struct address_space *, loff_t lstart,
+  loff_t lend);
 extern int filemap_write_and_wait(struct address_space *mapping);
 extern int filemap_write_and_wait_range(struct address_space *mapping,
loff_t lstart, loff_t lend);
diff --git a/mm/filemap.c b/mm/filemap.c
index 78dd50e..82335f4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -375,6 +375,39 @@ int filemap_flush(struct address_space *mapping)
 }
 EXPORT_SYMBOL(filemap_flush);
 
+/**
+ * filemap_range_has_page - check if a page exists in range.
+ * @mapping:   address space structure to wait for
+ * @start_byte:offset in bytes where the range starts
+ * @end_byte:  offset in bytes where the range ends (inclusive)
+ *
+ * Find at least one page in the range supplied, usually used to check if
+ * direct writing in this range will trigger a writeback.
+ */
+int filemap_range_has_page(struct address_space *mapping,
+   loff_t start_byte, loff_t end_byte)
+{
+   pgoff_t index = start_byte >> PAGE_SHIFT;
+   pgoff_t end = end_byte >> PAGE_SHIFT;
+   struct pagevec pvec;
+   int ret;
+
+   if (end_byte < start_byte)
+   return 0;
+
+   if (mapping->nrpages == 0)
+   return 0;
+
+   pagevec_init(&pvec, 0);
+   ret = pagevec_lookup(&pvec, mapping, index, 1);
+   if (!ret)
+   return 0;
+   ret = (pvec.pages[0]->index <= end);
+   pagevec_release(&pvec);
+   return ret;
+}
+EXPORT_SYMBOL(filemap_range_has_page);
+
 static int __filemap_fdatawait_range(struct address_space *mapping,
 loff_t start_byte, loff_t end_byte)
 {
@@ -2631,6 +2664,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, 
struct iov_iter *from)
 
pos = iocb->ki_pos;
 
+   if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+   return -EINVAL;
+
if (limit != RLIM_INFINITY) {
if (iocb->ki_pos >= limit) {
send_sig(SIGXFSZ, current, 0);
@@ -2700,9 +2736,17 @@ generic_file_direct_write(struct kiocb *iocb, struct 
iov_iter *from)
write_len = iov_iter_count(from);
end = (pos + write_len - 1) >> PAGE_SHIFT;
 
-   written = filemap_write_and_wait_range(mapping, pos, pos + write_len - 
1);
-   if (written)
-   goto out;
+   if (iocb->ki_flags & IOCB_NOWAIT) {
+   /* If there are pages to writeback, return */
+   if (filemap_range_has_page(inode->i_mapping, pos,
+  pos + iov_iter_count(from)))
+   return -EAGAIN;
+   } else {
+   written = filemap_write_and_wait_range(mapping, pos,
+   pos + write_len - 1);
+   if (written)
+   goto out;
+   }
 
/*
 * After a write we want buffered reads to be sure to go to disk to get
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/8] nowait aio: xfs

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

If IOCB_NOWAIT is set, bail if the i_rwsem is not lockable
immediately.

IF IOMAP_NOWAIT is set, return EAGAIN in xfs_file_iomap_begin
if it needs allocation either due to file extending, writing to a hole,
or COW.

Signed-off-by: Goldwyn Rodrigues 
---
 fs/xfs/xfs_file.c  | 9 +++--
 fs/xfs/xfs_iomap.c | 9 +
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index bbb9eb6..7e16a83 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -528,12 +528,17 @@ xfs_file_dio_aio_write(
((iocb->ki_pos + count) & mp->m_blockmask)) {
unaligned_io = 1;
iolock = XFS_IOLOCK_EXCL;
+   if (iocb->ki_flags & IOCB_NOWAIT)
+   return -EAGAIN;
} else {
iolock = XFS_IOLOCK_SHARED;
}
 
-   xfs_ilock(ip, iolock);
-
+   if (!xfs_ilock_nowait(ip, iolock)) {
+   if (iocb->ki_flags & IOCB_NOWAIT)
+   return -EAGAIN;
+   xfs_ilock(ip, iolock);
+   }
ret = xfs_file_aio_write_checks(iocb, from, &iolock);
if (ret)
goto out;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1aa3abd..84f981a 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1020,6 +1020,11 @@ xfs_file_iomap_begin(
if ((flags & IOMAP_REPORT) ||
(xfs_is_reflink_inode(ip) &&
 (flags & IOMAP_WRITE) && (flags & IOMAP_DIRECT))) {
+   /* Allocations due to reflinks */
+   if ((flags & IOMAP_NOWAIT) && !(flags & IOMAP_REPORT)) {
+   error = -EAGAIN;
+   goto out_unlock;
+   }
/* Trim the mapping to the nearest shared extent boundary. */
error = xfs_reflink_trim_around_shared(ip, &imap, &shared,
&trimmed);
@@ -1049,6 +1054,10 @@ xfs_file_iomap_begin(
}
 
if ((flags & IOMAP_WRITE) && imap_needs_alloc(inode, &imap, nimaps)) {
+   if (flags & IOMAP_NOWAIT) {
+   error = -EAGAIN;
+   goto out_unlock;
+   }
/*
 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
 * pages to keep the chunks of work done where somewhat 
symmetric
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/8 v2] Non-blocking AIO

2017-02-28 Thread Goldwyn Rodrigues
This series adds nonblocking feature to asynchronous I/O writes.
io_submit() can be delayed because of a number of reason:
 - Block allocation for files
 - Data writebacks for direct I/O
 - Sleeping because of waiting to acquire i_rwsem
 - Congested block device

The goal of the patch series is to return -EAGAIN/-EWOULDBLOCK if
any of these conditions are met. This way userspace can push most
of the write()s to the kernel to the best of its ability to complete
and if it returns -EAGAIN, can defer it to another thread.

In order to enable this, IOCB_FLAG_NOWAIT is introduced in
uapi/linux/aio_abi.h which translates to IOCB_NOWAIT for struct iocb,
BIO_NOWAIT for bio and IOMAP_NOWAIT for iomap.

This feature is provided for direct I/O of asynchronous I/O only. I have
tested it against xfs, ext4, and btrfs.

Changes since v1:
 + Forwardported from 4.9.10
 + changed name from _NONBLOCKING to *_NOWAIT
 + filemap_range_has_page call moved to closer to (just before) calling 
filemap_write_and_wait_range().
 + BIO_NOWAIT limited to get_request()
 + XFS fixes 
- included reflink 
- use of xfs_ilock_nowait() instead of a XFS_IOLOCK_NONBLOCKING flag
- Translate the flag through IOMAP_NOWAIT (iomap) to check for
  block allocation for the file.
 + ext4 coding style
-- 
Goldwyn


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/8] nowait aio: Return if cannot get hold of i_rwsem

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A failure to lock i_rwsem would mean there is I/O being performed
by another thread. So, let's bail.

Signed-off-by: Goldwyn Rodrigues 
---
 mm/filemap.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 3f9afde..78dd50e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2973,7 +2973,12 @@ ssize_t generic_file_write_iter(struct kiocb *iocb, 
struct iov_iter *from)
struct inode *inode = file->f_mapping->host;
ssize_t ret;
 
-   inode_lock(inode);
+   if (!inode_trylock(inode)) {
+   /* Don't sleep on inode rwsem */
+   if (iocb->ki_flags & IOCB_NOWAIT)
+   return -EAGAIN;
+   inode_lock(inode);
+   }
ret = generic_write_checks(iocb, from);
if (ret > 0)
ret = __generic_file_write_iter(iocb, from);
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 5/8] nowait aio: return on congested block device

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

A new flag BIO_NOWAIT is introduced to identify bio's
orignating from iocb with IOCB_NOWAIT. This flag indicates
to return immediately if a request cannot be made instead
of retrying.

Signed-off-by: Goldwyn Rodrigues 
---
 block/blk-core.c  | 13 +++--
 fs/direct-io.c| 11 +--
 include/linux/blk_types.h |  1 +
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 61ba08c..e5cfc50 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1258,6 +1258,11 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
if (!IS_ERR(rq))
return rq;
 
+   if (bio_flagged(bio, BIO_NOWAIT)) {
+   blk_put_rl(rl);
+   return ERR_PTR(-EAGAIN);
+   }
+
if (!gfpflags_allow_blocking(gfp_mask) || unlikely(blk_queue_dying(q))) 
{
blk_put_rl(rl);
return rq;
@@ -2018,7 +2023,7 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
 
-   if (likely(blk_queue_enter(q, false) == 0)) {
+   if (likely(blk_queue_enter(q, bio_flagged(bio, BIO_NOWAIT)) == 
0)) {
ret = q->make_request_fn(q, bio);
 
blk_queue_exit(q);
@@ -2027,7 +2032,11 @@ blk_qc_t generic_make_request(struct bio *bio)
} else {
struct bio *bio_next = bio_list_pop(current->bio_list);
 
-   bio_io_error(bio);
+   if (unlikely(bio_flagged(bio, BIO_NOWAIT))) {
+   bio->bi_error = -EAGAIN;
+   bio_endio(bio);
+   } else
+   bio_io_error(bio);
bio = bio_next;
}
} while (bio);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index c87bae4..2973df0 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -386,6 +386,9 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
else
bio->bi_end_io = dio_bio_end_io;
 
+   if (dio->iocb->ki_flags & IOCB_NOWAIT)
+   bio_set_flag(bio, BIO_NOWAIT);
+
sdio->bio = bio;
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
@@ -480,8 +483,12 @@ static int dio_bio_complete(struct dio *dio, struct bio 
*bio)
unsigned i;
int err;
 
-   if (bio->bi_error)
-   dio->io_error = -EIO;
+   if (bio->bi_error) {
+   if (bio_flagged(bio, BIO_NOWAIT))
+   dio->io_error = bio->bi_error;
+   else
+   dio->io_error = -EIO;
+   }
 
if (dio->is_async && dio->op == REQ_OP_READ && dio->should_dirty) {
err = bio->bi_error;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 519ea2c..1d77e9b 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -102,6 +102,7 @@ struct bio {
 #define BIO_REFFED 8   /* bio has elevated ->bi_cnt */
 #define BIO_THROTTLED  9   /* This bio has already been subjected to
 * throttling rules. Don't do it again. */
+#define BIO_NOWAIT 10  /* don't block over blk device congestion */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 6/8] nowait aio: ext4

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

Return EAGAIN if any of the following checks fail for direct I/O:
 + i_rwsem is lockable
 + Writing beyond end of file (will trigger allocation)
 + Blocks are not allocated at the write location

Signed-off-by: Goldwyn Rodrigues 
---
 fs/ext4/file.c | 53 +++--
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d663d3d..391e03b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -124,27 +124,22 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter 
*from, loff_t pos)
return 0;
 }
 
-/* Is IO overwriting allocated and initialized blocks? */
-static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
+/* Are IO blocks allocated */
+static bool ext4_blocks_mapped(struct inode *inode, loff_t pos, loff_t len,
+   struct ext4_map_blocks *map)
 {
-   struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, blklen;
 
if (pos + len > i_size_read(inode))
return false;
 
-   map.m_lblk = pos >> blkbits;
-   map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
-   blklen = map.m_len;
+   map->m_lblk = pos >> blkbits;
+   map->m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
+   blklen = map->m_len;
 
-   err = ext4_map_blocks(NULL, inode, &map, 0);
-   /*
-* 'err==len' means that all of the blocks have been preallocated,
-* regardless of whether they have been initialized or not. To exclude
-* unwritten extents, we need to check m_flags.
-*/
-   return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
+   err = ext4_map_blocks(NULL, inode, map, 0);
+   return err == blklen;
 }
 
 static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
@@ -176,6 +171,7 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter 
*from)
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
bool overwrite = false;
+   struct ext4_map_blocks map;
 
inode_lock(inode);
ret = ext4_write_checks(iocb, from);
@@ -188,7 +184,9 @@ ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter 
*from)
if (ret)
goto out;
 
-   if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) {
+   if (ext4_blocks_mapped(inode, iocb->ki_pos,
+   iov_iter_count(from), &map) &&
+   (map.m_flags & EXT4_MAP_MAPPED)) {
overwrite = true;
downgrade_write(&inode->i_rwsem);
}
@@ -209,6 +207,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter 
*from)
 {
struct inode *inode = file_inode(iocb->ki_filp);
int o_direct = iocb->ki_flags & IOCB_DIRECT;
+   int nowait = iocb->ki_flags & IOCB_NOWAIT;
int unaligned_aio = 0;
int overwrite = 0;
ssize_t ret;
@@ -218,7 +217,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter 
*from)
return ext4_dax_write_iter(iocb, from);
 #endif
 
-   inode_lock(inode);
+   if (o_direct && nowait) {
+   if (!inode_trylock(inode))
+   return -EAGAIN;
+   } else {
+   inode_lock(inode);
+   }
+
ret = ext4_write_checks(iocb, from);
if (ret <= 0)
goto out;
@@ -237,9 +242,21 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter 
*from)
 
iocb->private = &overwrite;
/* Check whether we do a DIO overwrite or not */
-   if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
-   ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
-   overwrite = 1;
+   if (o_direct && !unaligned_aio) {
+   struct ext4_map_blocks map;
+   if (ext4_blocks_mapped(inode, iocb->ki_pos,
+ iov_iter_count(from), &map)) {
+   /* To exclude unwritten extents, we need to check
+* m_flags.
+*/
+   if (ext4_should_dioread_nolock(inode) &&
+   (map.m_flags & EXT4_MAP_MAPPED))
+   overwrite = 1;
+   } else if (iocb->ki_flags & IOCB_NOWAIT) {
+   ret = -EAGAIN;
+   goto out;
+   }
+   }
 
ret = __generic_file_write_iter(iocb, from);
inode_unlock(inode);
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/8] nowait aio: Introduce IOCB_FLAG_NOWAIT

2017-02-28 Thread Goldwyn Rodrigues
From: Goldwyn Rodrigues 

This flag informs kernel to bail out if an AIO request will block
for reasons such as file allocations, or a writeback triggered,
or would block while allocating requests while performing
direct I/O.

IOCB_FLAG_NOWAIT is translated to IOCB_NOWAIT for
iocb->ki_flags.

Signed-off-by: Goldwyn Rodrigues 
---
 fs/aio.c | 3 +++
 include/linux/fs.h   | 1 +
 include/uapi/linux/aio_abi.h | 3 +++
 3 files changed, 7 insertions(+)

diff --git a/fs/aio.c b/fs/aio.c
index 873b4ca..5ae19ba 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1586,6 +1586,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb 
__user *user_iocb,
req->common.ki_flags |= IOCB_EVENTFD;
}
 
+   if (iocb->aio_flags & IOCB_FLAG_NOWAIT)
+   req->common.ki_flags |= IOCB_NOWAIT;
+
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
pr_debug("EFAULT: aio_key\n");
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2ba0743..ab2f556 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -270,6 +270,7 @@ struct writeback_control;
 #define IOCB_DSYNC (1 << 4)
 #define IOCB_SYNC  (1 << 5)
 #define IOCB_WRITE (1 << 6)
+#define IOCB_NOWAIT(1 << 7)
 
 struct kiocb {
struct file *ki_filp;
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index bb2554f..82d1d94 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -51,8 +51,11 @@ enum {
  *
  * IOCB_FLAG_RESFD - Set if the "aio_resfd" member of the "struct iocb"
  *   is valid.
+ * IOCB_FLAG_NOWAIT - Set if the user wants the iocb to fail if it would block
+ * for operations such as disk allocation.
  */
 #define IOCB_FLAG_RESFD(1 << 0)
+#define IOCB_FLAG_NOWAIT   (1 << 1)
 
 /* read() from /dev/aio returns these structures. */
 struct io_event {
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PULL] Btrfs cleanups for 4.11, part 2

2017-02-28 Thread Chris Mason



On 02/28/2017 10:09 AM, David Sterba wrote:

Hi,

this is the second half of the 4.11 batch, the rest of the cleanups. Please
pull, thanks.

The following changes since commit 6288d6eabc7505f42dda34a2c2962f91914be3a4:

  Btrfs: use the correct type when creating cow dio extent (2017-02-22 15:55:03 
-0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git 
for-chris-4.11-part2

for you to fetch changes up to 20a7db8ab3f2057a518448b1728d504ffadef65e:

  btrfs: add dummy callback for readpage_io_failed and drop checks (2017-02-28 
14:29:24 +0100)



Thanks Dave, I've got this along with Filipe's pull.

-chris

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] btrfs: remove btrfs_err_str function from uapi/linux/btrfs.h

2017-02-28 Thread Dmitry V. Levin
btrfs_err_str function is not called from anywhere and is replicated
in the userspace headers for btrfs-progs.

It's removal also fixes the following linux/btrfs.h userspace
compilation error:

/usr/include/linux/btrfs.h: In function 'btrfs_err_str':
/usr/include/linux/btrfs.h:740:11: error: 'NULL' undeclared (first use in this 
function)
return NULL;

Suggested-by: Jeff Mahoney 
Signed-off-by: Dmitry V. Levin 
Reviewed-by: David Sterba 
---
v3: the patch seems to be lost, resending with updated list of addressees

v2: remove btrfs_err_str rather than fix it

 include/uapi/linux/btrfs.h | 27 ---
 1 file changed, 27 deletions(-)

diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index db4c253..dcfc3a5 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -713,33 +713,6 @@ enum btrfs_err_code {
BTRFS_ERROR_DEV_ONLY_WRITABLE,
BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS
 };
-/* An error code to error string mapping for the kernel
-*  error codes
-*/
-static inline char *btrfs_err_str(enum btrfs_err_code err_code)
-{
-   switch (err_code) {
-   case BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET:
-   return "unable to go below two devices on raid1";
-   case BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET:
-   return "unable to go below four devices on raid10";
-   case BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET:
-   return "unable to go below two devices on raid5";
-   case BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET:
-   return "unable to go below three devices on raid6";
-   case BTRFS_ERROR_DEV_TGT_REPLACE:
-   return "unable to remove the dev_replace target dev";
-   case BTRFS_ERROR_DEV_MISSING_NOT_FOUND:
-   return "no missing devices found to remove";
-   case BTRFS_ERROR_DEV_ONLY_WRITABLE:
-   return "unable to remove the only writeable device";
-   case BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS:
-   return "add/delete/balance/replace/resize operation "\
-   "in progress";
-   default:
-   return NULL;
-   }
-}
 
 #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
   struct btrfs_ioctl_vol_args)
-- 
ldv
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] reflink: test adjacency of reflinked blocks

2017-02-28 Thread Darrick J. Wong
On Tue, Feb 28, 2017 at 04:15:02PM +0800, Eryu Guan wrote:
> On Fri, Feb 24, 2017 at 05:12:57PM -0800, Darrick J. Wong wrote:
> > From: Darrick J. Wong 
> > 
> > If we reflink a file with N blocks to another file one block at a time,
> > does the destination file end up with the same number of extents as the
> > source file?  In other words, does the filesystem succeed at combining
> > adjacent mappings into a maximal extents?
> 
> I'm not sure if this is a standard behavior and applies to btrfs too?
> But btrfs is failing this test now:
> 
> +f1 (1) != f2 (32)
> +s1 (1) != s2 (32)
> 
> Fix test or btrfs? I'm taking it if btrfs is the one to be fixed :)

btrfs has that weird behavior where it doesn't merge the adjacent
extents at all (at least not according to FIEMAP) until you remount the
filesystem.  After the remount it's fine, but... WTF? :)

So yes, the test is working as designed.  btrfs needs fixing, or I guess
worst case we can _notrun it on btrfs.

Snark aside, it was intended originally to make sure that XFS is
properly merging the extent records together; then it occurred to me to
rewrite it with fiemap and make it one of the generic reflink tests so
that ocfs2 can get tested too.

--D

> 
> Thanks,
> Eryu
> 
> > 
> > Signed-off-by: Darrick J. Wong 
> > ---
> >  tests/generic/930 |  106 
> > +
> >  tests/generic/930.out |   11 +
> >  tests/generic/group   |1 
> >  3 files changed, 118 insertions(+)
> >  create mode 100755 tests/generic/930
> >  create mode 100644 tests/generic/930.out
> > 
> > 
> > diff --git a/tests/generic/930 b/tests/generic/930
> > new file mode 100755
> > index 000..15d8cbf
> > --- /dev/null
> > +++ b/tests/generic/930
> > @@ -0,0 +1,106 @@
> > +#! /bin/bash
> > +# FS QA Test No. 930
> > +#
> > +# Check that reflinking adjacent blocks in a file produces a single
> > +# block mapping extent.
> > +#
> > +#---
> > +# Copyright (c) 2017 Oracle, Inc.  All Rights Reserved.
> > +#
> > +# This program is free software; you can redistribute it and/or
> > +# modify it under the terms of the GNU General Public License as
> > +# published by the Free Software Foundation.
> > +#
> > +# This program is distributed in the hope that it would be useful,
> > +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > +# GNU General Public License for more details.
> > +#
> > +# You should have received a copy of the GNU General Public License
> > +# along with this program; if not, write the Free Software Foundation,
> > +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> > +#---
> > +#
> > +
> > +seq=`basename $0`
> > +seqres=$RESULT_DIR/$seq
> > +echo "QA output created by $seq"
> > +
> > +here=`pwd`
> > +tmp=/tmp/$$
> > +status=1   # failure is the default!
> > +trap "_cleanup; exit \$status" 0 1 2 3 7 15
> > +
> > +_cleanup()
> > +{
> > +   cd /
> > +   rm -rf $tmp.*
> > +   wait
> > +}
> > +
> > +# get standard environment, filters and checks
> > +. ./common/rc
> > +. ./common/filter
> > +. ./common/reflink
> > +
> > +# real QA test starts here
> > +_supported_os Linux
> > +_supported_fs generic
> > +_require_scratch_reflink
> > +_require_fiemap
> > +
> > +echo "Format and mount"
> > +_scratch_mkfs > $seqres.full 2>&1
> > +_scratch_mount >> $seqres.full 2>&1
> > +
> > +testdir=$SCRATCH_MNT/test-$seq
> > +mkdir $testdir
> > +
> > +blocks=32
> > +blksz=65536
> > +sz=$((blocks * blksz))
> > +
> > +echo "Create the original files"
> > +$XFS_IO_PROG -f -c "falloc 0 $sz" $testdir/file1 >> $seqres.full
> > +_pwrite_byte 0x61 0 $sz $testdir/file1 >> $seqres.full
> > +seq 0 $blksz $((sz - blksz)) | while read offset; do
> > +   _reflink_range $testdir/file1 $offset $testdir/file2 $offset $blksz >> 
> > $seqres.full
> > +done
> > +
> > +echo "Compare files"
> > +md5sum $testdir/file1 | _filter_scratch
> > +md5sum $testdir/file2 | _filter_scratch
> > +
> > +echo "Check extent counts"
> > +f1=$(_count_extents $testdir/file1)
> > +f2=$(_count_extents $testdir/file2)
> > +s1=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file1 | awk '{print $5}' | grep 
> > -c '0x.*[2367aAbBfF]...$')
> > +s2=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file2 | awk '{print $5}' | grep 
> > -c '0x.*[2367aAbBfF]...$')
> > +
> > +# Did the fs combine the extent mappings when we made f2?
> > +test $f1 -eq $f2 || echo "f1 ($f1) != f2 ($f2)"
> > +test $s1 -eq $s2 || echo "s1 ($s1) != s2 ($s2)"
> > +test $f1 -eq $s1 || echo "f1 ($f1) != s1 ($f1)"
> > +test $f2 -eq $s2 || echo "f2 ($f2) != s2 ($f2)"
> > +
> > +_scratch_cycle_mount
> > +
> > +echo "Compare files after remounting"
> > +md5sum $testdir/file1 | _filter_scratch
> > +md5sum $testdir/file2 | _filter_scratch
> > +
> > +echo "Check extent counts"
> > +f1=$(_count_extents $testdir/file1)
> > 

[PULL] Btrfs cleanups for 4.11, part 2

2017-02-28 Thread David Sterba
Hi,

this is the second half of the 4.11 batch, the rest of the cleanups. Please
pull, thanks.

The following changes since commit 6288d6eabc7505f42dda34a2c2962f91914be3a4:

  Btrfs: use the correct type when creating cow dio extent (2017-02-22 15:55:03 
-0800)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git 
for-chris-4.11-part2

for you to fetch changes up to 20a7db8ab3f2057a518448b1728d504ffadef65e:

  btrfs: add dummy callback for readpage_io_failed and drop checks (2017-02-28 
14:29:24 +0100)


David Sterba (16):
  btrfs: constify device path passed to relevant helpers
  btrfs: constify input buffer of btrfs_csum_data
  btrfs: constify buffers used by compression helpers
  btrfs: constify name of subvolume in creation helpers
  btrfs: merge length input and output parameter in compress_pages
  btrfs: merge nr_pages input and output parameter in compress_pages
  btrfs: export compression buffer limits in a header
  btrfs: use predefined limits for calculating maximum number of pages for 
compression
  btrfs: derive maximum output size in the compression implementation
  btrfs: remove BUG_ON from __tree_mod_log_insert
  btrfs: handle allocation error in update_dev_stat_item
  btrfs: do proper error handling in btrfs_insert_xattr_item
  btrfs: let writepage_end_io_hook return void
  btrfs: document existence of extent_io ops callbacks
  btrfs: drop checks for mandatory extent_io_ops callbacks
  btrfs: add dummy callback for readpage_io_failed and drop checks

Nikolay Borisov (42):
  btrfs: Make btrfs_log_all_parents take btrfs_inode
  btrfs: Make btrfs_insert_dir_item take btrfs_inode
  btrfs: make btrfs_set_inode_index_count take btrfs_inode
  btrfs: Make btrfs_set_inode_index take btrfs_inode
  btrfs: Make btrfs_i_size_write take btrfs_inode
  btrfs: make btrfs_is_free_space_inode take btrfs_inode
  btrfs: make btrfs_alloc_data_chunk_ondemand take btrfs_inode
  btrfs: Make drop_outstanding_extent take btrfs_inode
  btrfs: Make calc_csum_metadata_size take btrfs_inode
  btrfs: Make btrfs_orphan_reserve_metadata take btrfs_inode
  btrfs: Make btrfs_orphan_release_metadata take btrfs_inode
  btrfs: Make btrfs_delalloc_reserve_metadata take btrfs_inode
  btrfs: all btrfs_delalloc_release_metadata take btrfs_inode
  btrfs: Make (__)btrfs_add_inode_defrag take btrfs_inode
  btrfs: Make btrfs_requeue_inode_defrag take btrfs_inode
  btrfs: Make btrfs_drop_extent_cache take btrfs_inode
  btrfs: Make hole_mergeable take btrfs_inode
  btrfs: Make fill_holes take btrfs_inode
  btrfs: Make btrfs_mark_extent_written take btrfs_inode
  btrfs: Make btrfs_lookup_ordered_range take btrfs_inode
  btrfs: Make check_can_nocow take btrfs_inode
  btrfs: Make lock_and_cleanup_extent_if_need take btrfs_inode
  btrfs: make free_io_failure take btrfs_inode
  btrfs: make btrfs_print_data_csum_error take btrfs_inode
  btrfs: make check_compressed_csum take btrfs_inode
  btrfs: make repair_io_failure take btrfs_inode
  btrfs: make clean_io_failure take btrfs_inode
  btrfs: make btrfs_free_io_failure_record take btrfs_inode
  btrfs: make btrfs_orphan_del take btrfs_inode
  btrfs: Make btrfs_orphan_add take btrfs_inode
  btrfs: Make check_parent_dirs_for_sync take btrfs_inode
  btrfs: make btrfs_log_inode_parent take btrfs_inode
  btrfs: Make btrfs_extent_item_to_extent_map take btrfs_inode
  btrfs: Make btrfs_clear_bit_hook take btrfs_inode
  btrfs: Make clone_update_extent_map take btrfs_inode
  btrfs: Make check_extent_to_block take btrfs_inode
  btrfs: Make get_extent_t take btrfs_inode
  btrfs: Make btrfs_del_delalloc_inode take btrfs_inode
  btrfs: Make btrfs_add_link take btrfs_inode
  btrfs: Make btrfs_add_nondir take btrfs_inode
  btrfs: make btrfs_inode_block_unlocked_dio take btrfs_inode
  btrfs: make btrfs_inode_resume_unlocked_dio take btrfs_inode

 fs/btrfs/btrfs_inode.h   |  31 ++--
 fs/btrfs/compression.c   |  43 ++---
 fs/btrfs/compression.h   |  30 ++--
 fs/btrfs/ctree.c |   2 -
 fs/btrfs/ctree.h |  39 ++--
 fs/btrfs/delayed-inode.c |   2 +-
 fs/btrfs/dev-replace.c   |   5 +-
 fs/btrfs/dev-replace.h   |   5 +-
 fs/btrfs/dir-item.c  |   9 +-
 fs/btrfs/disk-io.c   |  17 +-
 fs/btrfs/disk-io.h   |   2 +-
 fs/btrfs/extent-tree.c   | 135 +++---
 fs/btrfs/extent_io.c |  75 
 fs/btrfs/extent_io.h |  50 --
 fs/btrfs/file-item.c |  12 +-
 fs/btrfs/file.c  | 139 ---
 fs/btrfs/free-space-cache.c  |   5 +-
 fs/btrfs/inode-map.c |   2 +-
 fs/btrfs/inode.c | 416 ++-
 fs/btrfs/ioctl.

Re: [PATCH 3/3] btrfs: do proper error handling in btrfs_insert_xattr_item

2017-02-28 Thread David Sterba
On Tue, Feb 21, 2017 at 09:39:05PM -0800, Liu Bo wrote:
> On Mon, Feb 20, 2017 at 07:25:06PM +0100, David Sterba wrote:
> > The space check in btrfs_insert_xattr_item is duplicated in it's caller
> > (do_setxattr) so we won't hit the BUG_ON. Continuing without any check
> > could be disasterous so turn it to a proper error handling.
> > 
> > Signed-off-by: David Sterba 
> > ---
> >  fs/btrfs/dir-item.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
> > index 724504a2d7ac..640801082533 100644
> > --- a/fs/btrfs/dir-item.c
> > +++ b/fs/btrfs/dir-item.c
> > @@ -80,7 +80,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle 
> > *trans,
> > struct extent_buffer *leaf;
> > u32 data_size;
> >  
> > -   BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info));
> > +   if (name_len + data_len > BTRFS_MAX_XATTR_SIZE(root->fs_info))
> > +   return -ENOSPC;
> >
> 
> Besides making it silent, how about adding a ASSERT to cry out?
> (Although currently we'd never come into this case.)

I don't think we need the assert, the caller is supposed to handle the
error. In this case it's validation of input parameters, that could
possibly happen as the function is not static.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/4] reflink: test adjacency of reflinked blocks

2017-02-28 Thread Eryu Guan
On Fri, Feb 24, 2017 at 05:12:57PM -0800, Darrick J. Wong wrote:
> From: Darrick J. Wong 
> 
> If we reflink a file with N blocks to another file one block at a time,
> does the destination file end up with the same number of extents as the
> source file?  In other words, does the filesystem succeed at combining
> adjacent mappings into a maximal extents?

I'm not sure if this is a standard behavior and applies to btrfs too?
But btrfs is failing this test now:

+f1 (1) != f2 (32)
+s1 (1) != s2 (32)

Fix test or btrfs? I'm taking it if btrfs is the one to be fixed :)

Thanks,
Eryu

> 
> Signed-off-by: Darrick J. Wong 
> ---
>  tests/generic/930 |  106 
> +
>  tests/generic/930.out |   11 +
>  tests/generic/group   |1 
>  3 files changed, 118 insertions(+)
>  create mode 100755 tests/generic/930
>  create mode 100644 tests/generic/930.out
> 
> 
> diff --git a/tests/generic/930 b/tests/generic/930
> new file mode 100755
> index 000..15d8cbf
> --- /dev/null
> +++ b/tests/generic/930
> @@ -0,0 +1,106 @@
> +#! /bin/bash
> +# FS QA Test No. 930
> +#
> +# Check that reflinking adjacent blocks in a file produces a single
> +# block mapping extent.
> +#
> +#---
> +# Copyright (c) 2017 Oracle, Inc.  All Rights Reserved.
> +#
> +# This program is free software; you can redistribute it and/or
> +# modify it under the terms of the GNU General Public License as
> +# published by the Free Software Foundation.
> +#
> +# This program is distributed in the hope that it would be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with this program; if not, write the Free Software Foundation,
> +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
> +#---
> +#
> +
> +seq=`basename $0`
> +seqres=$RESULT_DIR/$seq
> +echo "QA output created by $seq"
> +
> +here=`pwd`
> +tmp=/tmp/$$
> +status=1 # failure is the default!
> +trap "_cleanup; exit \$status" 0 1 2 3 7 15
> +
> +_cleanup()
> +{
> + cd /
> + rm -rf $tmp.*
> + wait
> +}
> +
> +# get standard environment, filters and checks
> +. ./common/rc
> +. ./common/filter
> +. ./common/reflink
> +
> +# real QA test starts here
> +_supported_os Linux
> +_supported_fs generic
> +_require_scratch_reflink
> +_require_fiemap
> +
> +echo "Format and mount"
> +_scratch_mkfs > $seqres.full 2>&1
> +_scratch_mount >> $seqres.full 2>&1
> +
> +testdir=$SCRATCH_MNT/test-$seq
> +mkdir $testdir
> +
> +blocks=32
> +blksz=65536
> +sz=$((blocks * blksz))
> +
> +echo "Create the original files"
> +$XFS_IO_PROG -f -c "falloc 0 $sz" $testdir/file1 >> $seqres.full
> +_pwrite_byte 0x61 0 $sz $testdir/file1 >> $seqres.full
> +seq 0 $blksz $((sz - blksz)) | while read offset; do
> + _reflink_range $testdir/file1 $offset $testdir/file2 $offset $blksz >> 
> $seqres.full
> +done
> +
> +echo "Compare files"
> +md5sum $testdir/file1 | _filter_scratch
> +md5sum $testdir/file2 | _filter_scratch
> +
> +echo "Check extent counts"
> +f1=$(_count_extents $testdir/file1)
> +f2=$(_count_extents $testdir/file2)
> +s1=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file1 | awk '{print $5}' | grep -c 
> '0x.*[2367aAbBfF]...$')
> +s2=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file2 | awk '{print $5}' | grep -c 
> '0x.*[2367aAbBfF]...$')
> +
> +# Did the fs combine the extent mappings when we made f2?
> +test $f1 -eq $f2 || echo "f1 ($f1) != f2 ($f2)"
> +test $s1 -eq $s2 || echo "s1 ($s1) != s2 ($s2)"
> +test $f1 -eq $s1 || echo "f1 ($f1) != s1 ($f1)"
> +test $f2 -eq $s2 || echo "f2 ($f2) != s2 ($f2)"
> +
> +_scratch_cycle_mount
> +
> +echo "Compare files after remounting"
> +md5sum $testdir/file1 | _filter_scratch
> +md5sum $testdir/file2 | _filter_scratch
> +
> +echo "Check extent counts"
> +f1=$(_count_extents $testdir/file1)
> +f2=$(_count_extents $testdir/file2)
> +s1=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file1 | awk '{print $5}' | grep -c 
> '0x.*[2367aAbBfF]...$')
> +s2=$($XFS_IO_PROG -c 'fiemap -v' $testdir/file2 | awk '{print $5}' | grep -c 
> '0x.*[2367aAbBfF]...$')
> +
> +# Are the mappings still combined?
> +test $f1 -eq $f2 || echo "f1 ($f1) != f2 ($f2)"
> +test $s1 -eq $s2 || echo "s1 ($s1) != s2 ($s2)"
> +test $f1 -eq $s1 || echo "f1 ($f1) != s1 ($f1)"
> +test $f2 -eq $s2 || echo "f2 ($f2) != s2 ($f2)"
> +
> +# success, all done
> +status=0
> +exit
> diff --git a/tests/generic/930.out b/tests/generic/930.out
> new file mode 100644
> index 000..556108a
> --- /dev/null
> +++ b/tests/generic/930.out
> @@ -0,0 +1,11 @@
> +QA output created by 930
> +Format and mount
> +Create the original files
> +Compare files
> +de89461b64701958984c95d1bfb0065a  SCRATCH_MNT/test-930/fi