Re: [PATCH] Btrfs: set the free space control unit properly

2013-05-28 Thread Miao Xie
On  tue, 28 May 2013 14:50:25 -0400, Josef Bacik wrote:
> Stefan pointed out that xfstests generic/013 was failing because the free 
> space
> cache checker was complaining with leafsize of 16k.  Turns out this is because
> we were unconditionally using root->sectorsize as the free space ctl unit in 
> the
> kernel, which doesn't work out if leafsize != sectorsize.  This caused the in
> memory free space cache to get screwed up which translated to a wrong space
> cache on disk.  This patch fixes the problem by not carrying the sectorsize in
> the block group since we have the ctl->unit, and we set the ctl->unit 
> according
> to the type of block group we are.  This made generic/013 pass with 16k
> leafsize, whereas before it failed every single time.  Thanks,

But this patch will make the old filesystem be corrupted because one bit in it
equals one sector(4K), not 16K.

Thanks
Miao

> 
> Cc: sta...@vger.kernel.org
> Reported-by: Stefan Behrens 
> Signed-off-by: Josef Bacik 
> ---
>  fs/btrfs/ctree.h|1 -
>  fs/btrfs/extent-tree.c  |7 ++-
>  fs/btrfs/free-space-cache.c |   32 +---
>  fs/btrfs/free-space-cache.h |3 ++-
>  4 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index fd62aa8..3442976 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -1206,7 +1206,6 @@ struct btrfs_block_group_cache {
>   u64 reserved;
>   u64 bytes_super;
>   u64 flags;
> - u64 sectorsize;
>   u64 cache_generation;
>  
>   /* for raid56, this is a full stripe, without parity */
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index 4ec8305..f7af6a0 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -8128,11 +8128,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
>   key.objectid = found_key.objectid + found_key.offset;
>   btrfs_release_path(path);
>   cache->flags = btrfs_block_group_flags(&cache->item);
> - cache->sectorsize = root->sectorsize;
>   cache->full_stripe_len = btrfs_full_stripe_len(root,
>  &root->fs_info->mapping_tree,
>  found_key.objectid);
> - btrfs_init_free_space_ctl(cache);
> + btrfs_init_free_space_ctl(cache, root);
>  
>   /*
>* We need to exclude the super stripes now so that the space
> @@ -8283,7 +8282,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
> *trans,
>   cache->key.objectid = chunk_offset;
>   cache->key.offset = size;
>   cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
> - cache->sectorsize = root->sectorsize;
>   cache->fs_info = root->fs_info;
>   cache->full_stripe_len = btrfs_full_stripe_len(root,
>  &root->fs_info->mapping_tree,
> @@ -8295,12 +8293,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
> *trans,
>   INIT_LIST_HEAD(&cache->cluster_list);
>   INIT_LIST_HEAD(&cache->new_bg_list);
>  
> - btrfs_init_free_space_ctl(cache);
> -
>   btrfs_set_block_group_used(&cache->item, bytes_used);
>   btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
>   cache->flags = type;
>   btrfs_set_block_group_flags(&cache->item, type);
> + btrfs_init_free_space_ctl(cache, root);
>  
>   cache->last_byte_to_unpin = (u64)-1;
>   cache->cached = BTRFS_CACHE_FINISHED;
> diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
> index 7517285..ec43e422 100644
> --- a/fs/btrfs/free-space-cache.c
> +++ b/fs/btrfs/free-space-cache.c
> @@ -1654,7 +1654,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
>* of cache left then go ahead an dadd them, no sense in adding
>* the overhead of a bitmap if we don't have to.
>*/
> - if (info->bytes <= block_group->sectorsize * 4) {
> + if (info->bytes <= ctl->unit * 4) {
>   if (ctl->free_extents * 2 <= ctl->extents_thresh)
>   return false;
>   } else {
> @@ -2001,12 +2001,19 @@ void btrfs_dump_free_space(struct 
> btrfs_block_group_cache *block_group,
>  "\n", count);
>  }
>  
> -void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
> +void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
> +struct btrfs_root *root)
>  {
>   struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
>  
>   spin_lock_init(&ctl->tree_lock);
> - ctl->unit = block_group->sectorsize;
> +
> + /* This works for mixed block groups too since sectorsize == leafsize */
> + if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA |
> +   BTRFS_BLOCK_GROUP_SYSTEM))
> +   

Re: nocow 'C' flag ignored after balance

2013-05-28 Thread Liu Bo
On Tue, May 28, 2013 at 09:22:11AM -0500, Kyle Gates wrote:
> >From: Liu Bo 
> >
> >Subject: [PATCH] Btrfs: fix broken nocow after a normal balance
> >
>[...]
> 
> Sorry for the long wait in replying.
> This patch was unsuccessful in fixing the problem (on my 3.8 Ubuntu
> Raring kernel). I can probably try again on a newer version if you
> think it will help.
> This was my first kernel compile so I patched by hand and waited (10
> hours on my old 32 bit single core machine).
> 
> I did move some of the files off and back on to the filesystem to
> start fresh and compare but all seem to exhibit the same behavior
> after a balance.
>

Thanks for testing the patch although it didn't help you.
Actually I tested it to be sure that it fixed the problems in my reproducer.

So anyway can you please apply this debug patch in order to nail it down?

thanks,
liubo

 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index df472ab..c12a11c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2857,8 +2857,12 @@ static noinline int check_committed_ref(struct 
btrfs_trans_handle *trans,
goto out;
 
if (btrfs_extent_generation(leaf, ei) <=
-   btrfs_root_last_snapshot(&root->root_item))
+   btrfs_root_last_snapshot(&root->root_item)) {
+   printk("extent gen %llu last_snap %llu\n",
+   btrfs_extent_generation(leaf, ei),
+   btrfs_root_last_snapshot(&root->root_item));
goto out;
+   }
 
iref = (struct btrfs_extent_inline_ref *)(ei + 1);
if (btrfs_extent_inline_ref_type(leaf, iref) !=
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 23c596c..8cad6ee 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1317,16 +1317,24 @@ next_slot:
goto out_check;
if (btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
-   btrfs_file_extent_other_encoding(leaf, fi))
+   btrfs_file_extent_other_encoding(leaf, fi)) {
+   printk("special encoding\n");
goto out_check;
-   if (extent_type == BTRFS_FILE_EXTENT_REG && !force)
+   }
+   if (extent_type == BTRFS_FILE_EXTENT_REG && !force) {
+   printk("BTRFS_FILE_EXTENT_REF\n");
goto out_check;
-   if (btrfs_extent_readonly(root, disk_bytenr))
+   }
+   if (btrfs_extent_readonly(root, disk_bytenr)) {
+   printk("ro\n");
goto out_check;
+   }
if (btrfs_cross_ref_exist(trans, root, ino,
  found_key.offset -
- extent_offset, disk_bytenr))
+ extent_offset, disk_bytenr)) {
+   printk("cross ref\n");
goto out_check;
+   }
disk_bytenr += extent_offset;
disk_bytenr += cur_offset - found_key.offset;
num_bytes = min(end + 1, extent_end) - cur_offset;

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v1 0/5] BTRFS hot relocation support

2013-05-28 Thread Kent Overstreet
On Tue, May 21, 2013 at 02:22:34AM +, Duncan wrote:
> zwu.kernel posted on Mon, 20 May 2013 23:11:22 +0800 as excerpted:
> 
> > The patchset is trying to introduce hot relocation support
> > for BTRFS. In hybrid storage environment, when the data in rotating disk
> > get hot, it can be relocated to nonrotating disk by BTRFS hot relocation
> > support automatically; also, if nonrotating disk ratio exceed its upper
> > threshold, the data which get cold can be looked up and relocated to
> > rotating disk to make more space in nonrotating disk at first, and then
> > the data which get hot will be relocated to nonrotating disk
> > automatically.
> 
> One advantage of a filesystem implementation, as opposed to bcache or 
> dmcache, is arguably a corner-case, but it's /my/ corner-case, so...
> 
> I run an intr*-less (I guess technically, empty initramfs) monolithic-
> kernel boot, using the kernel commandline root= and (formerly) md= and 
> related logic to choose/assemble/mount root directly from the kernel 
> command line via bootloader (grub2).  Thus, any user-space-required-to-
> mount-root is out, since I don't have an initr* and thus no early 
> userspace.  That means both lvm2 and dmcache (AFAIK) are out.  I'm not 
> sure about bcache, but it has other negatives, particularly against btrfs-
> raid-1 and I'd guess md/raid-1 as well.
> 
> Much like md before it, btrfs, while normally requiring the user-space-
> required device-scan to properly handle multiple devices, has kernel-
> command-line options that allow direct kernel multi-device assembly 
> without the help of early-userspace/initr*.

I wouldn't be averse to adding such functionality to bcache, provided it
could be done reasonably cleanly/sensibly. It's not high on my list but
I'd accept patches :)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: set the free space control unit properly

2013-05-28 Thread Josef Bacik
Stefan pointed out that xfstests generic/013 was failing because the free space
cache checker was complaining with leafsize of 16k.  Turns out this is because
we were unconditionally using root->sectorsize as the free space ctl unit in the
kernel, which doesn't work out if leafsize != sectorsize.  This caused the in
memory free space cache to get screwed up which translated to a wrong space
cache on disk.  This patch fixes the problem by not carrying the sectorsize in
the block group since we have the ctl->unit, and we set the ctl->unit according
to the type of block group we are.  This made generic/013 pass with 16k
leafsize, whereas before it failed every single time.  Thanks,

Cc: sta...@vger.kernel.org
Reported-by: Stefan Behrens 
Signed-off-by: Josef Bacik 
---
 fs/btrfs/ctree.h|1 -
 fs/btrfs/extent-tree.c  |7 ++-
 fs/btrfs/free-space-cache.c |   32 +---
 fs/btrfs/free-space-cache.h |3 ++-
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fd62aa8..3442976 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1206,7 +1206,6 @@ struct btrfs_block_group_cache {
u64 reserved;
u64 bytes_super;
u64 flags;
-   u64 sectorsize;
u64 cache_generation;
 
/* for raid56, this is a full stripe, without parity */
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4ec8305..f7af6a0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8128,11 +8128,10 @@ int btrfs_read_block_groups(struct btrfs_root *root)
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
cache->flags = btrfs_block_group_flags(&cache->item);
-   cache->sectorsize = root->sectorsize;
cache->full_stripe_len = btrfs_full_stripe_len(root,
   &root->fs_info->mapping_tree,
   found_key.objectid);
-   btrfs_init_free_space_ctl(cache);
+   btrfs_init_free_space_ctl(cache, root);
 
/*
 * We need to exclude the super stripes now so that the space
@@ -8283,7 +8282,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
cache->key.objectid = chunk_offset;
cache->key.offset = size;
cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-   cache->sectorsize = root->sectorsize;
cache->fs_info = root->fs_info;
cache->full_stripe_len = btrfs_full_stripe_len(root,
   &root->fs_info->mapping_tree,
@@ -8295,12 +8293,11 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
INIT_LIST_HEAD(&cache->cluster_list);
INIT_LIST_HEAD(&cache->new_bg_list);
 
-   btrfs_init_free_space_ctl(cache);
-
btrfs_set_block_group_used(&cache->item, bytes_used);
btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid);
cache->flags = type;
btrfs_set_block_group_flags(&cache->item, type);
+   btrfs_init_free_space_ctl(cache, root);
 
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 7517285..ec43e422 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1654,7 +1654,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
 * of cache left then go ahead an dadd them, no sense in adding
 * the overhead of a bitmap if we don't have to.
 */
-   if (info->bytes <= block_group->sectorsize * 4) {
+   if (info->bytes <= ctl->unit * 4) {
if (ctl->free_extents * 2 <= ctl->extents_thresh)
return false;
} else {
@@ -2001,12 +2001,19 @@ void btrfs_dump_free_space(struct 
btrfs_block_group_cache *block_group,
   "\n", count);
 }
 
-void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
+void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group,
+  struct btrfs_root *root)
 {
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 
spin_lock_init(&ctl->tree_lock);
-   ctl->unit = block_group->sectorsize;
+
+   /* This works for mixed block groups too since sectorsize == leafsize */
+   if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA |
+ BTRFS_BLOCK_GROUP_SYSTEM))
+   ctl->unit = root->leafsize;
+   else
+   ctl->unit = root->sectorsize;
ctl->start = block_group->key.objectid;
ctl->private = block_group;
ctl->op = &free_space_op;
@@ -2548,10 +2555,10 @@ int btrfs_find_space_cluster(struct btrfs_trans_handle 
*trans,
  

Re: [PATCH 0/3] Btrfs: qgroup rescan fixes for next rc

2013-05-28 Thread Jan Schmidt
Hi Wang,

Please have a look at these patches, you should have been CCed but I just
realized git send-email doesn't care about Cc lines in the patch file. Sigh.

Thanks,
-Jan
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/3] Btrfs: avoid double free of fs_info->qgroup_ulist

2013-05-28 Thread Jan Schmidt
When btrfs_read_qgroup_config or btrfs_quota_enable return non-zero, we've
already freed the fs_info->qgroup_ulist. The final btrfs_free_qgroup_config
called from quota_disable makes another ulist_free(fs_info->qgroup_ulist)
call.

We set fs_info->qgroup_ulist to NULL on the mentioned error paths, turning
the ulist_free in btrfs_free_qgroup_config into a noop.

Cc: Wang Shilong 
Signed-off-by: Jan Schmidt 
---
 fs/btrfs/qgroup.c |8 ++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 74b432d..c6ce642 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -430,8 +430,10 @@ out:
}
btrfs_free_path(path);
 
-   if (ret < 0)
+   if (ret < 0) {
ulist_free(fs_info->qgroup_ulist);
+   fs_info->qgroup_ulist = NULL;
+   }
 
return ret < 0 ? ret : 0;
 }
@@ -932,8 +934,10 @@ out_free_root:
kfree(quota_root);
}
 out:
-   if (ret)
+   if (ret) {
ulist_free(fs_info->qgroup_ulist);
+   fs_info->qgroup_ulist = NULL;
+   }
mutex_unlock(&fs_info->qgroup_ioctl_lock);
return ret;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Btrfs: fix memory patcher through fs_info->qgroup_ulist

2013-05-28 Thread Jan Schmidt
Commit 5b7c665e introduced fs_info->qgroup_ulist, that is allocated during
btrfs_read_qgroup_config and meant to be used later by the qgroup accounting
code. However, it is always freed before btrfs_read_qgroup_config returns,
becuase the commit mentioned above adds a check for (ret), where a check
for (ret < 0) would have been the right choice. This commit fixes the check.

Cc: Wang Shilong 
Signed-off-by: Jan Schmidt 
---
 fs/btrfs/qgroup.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index d059d86..74b432d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -430,7 +430,7 @@ out:
}
btrfs_free_path(path);
 
-   if (ret)
+   if (ret < 0)
ulist_free(fs_info->qgroup_ulist);
 
return ret < 0 ? ret : 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] Btrfs: fix qgroup rescan resume on mount

2013-05-28 Thread Jan Schmidt
When called during mount, we cannot start the rescan worker thread until
open_ctree is done. This commit restuctures the qgroup rescan internals to
enable a clean deferral of the rescan resume operation.

First of all, the struct qgroup_rescan is removed, saving us a malloc and
some initialization synchronizations problems. Its only element (the worker
struct) now lives within fs_info just as the rest of the rescan code.

Then setting up a rescan worker is split into several reusable stages.
Currently we have three different rescan startup scenarios:
(A) rescan ioctl
(B) rescan resume by mount
(C) rescan by quota enable

Each case needs its own combination of the four following steps:
(1) set the progress [A, C: zero; B: state of umount]
(2) commit the transaction [A]
(3) set the counters [A, C: zero; B: state of umount]
(4) start worker [A, B, C]

qgroup_rescan_init does step (1). There's no extra function added to commit
a transaction, we've got that already. qgroup_rescan_zero_tracking does
step (3). Step (4) is nothing more than a call to the generic
btrfs_queue_worker.

We also get rid of a double check for the rescan progress during
btrfs_qgroup_account_ref, which is no longer required due to having step 2
from the list above.

As a side effect, this commit prepares to move the rescan start code from
btrfs_run_qgroups (which is run during commit) to a less time critical
section.

Signed-off-by: Jan Schmidt 
---
 fs/btrfs/ctree.h   |2 +
 fs/btrfs/disk-io.c |2 +
 fs/btrfs/qgroup.c  |  190 +---
 3 files changed, 125 insertions(+), 69 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index fd62aa8..8ac8d52 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1610,6 +1610,7 @@ struct btrfs_fs_info {
struct btrfs_key qgroup_rescan_progress;
struct btrfs_workers qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
+   struct btrfs_work qgroup_rescan_work;
 
/* filesystem state */
unsigned long fs_state;
@@ -3856,6 +3857,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
 int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
+void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
 int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
 int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
  struct btrfs_fs_info *fs_info, u64 src, u64 dst);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d7b46c6..da4a10c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2879,6 +2879,8 @@ retry_root_backup:
return ret;
}
 
+   btrfs_qgroup_rescan_resume(fs_info);
+
return 0;
 
 fail_qgroup:
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index c6ce642..1280eff 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -98,13 +98,10 @@ struct btrfs_qgroup_list {
struct btrfs_qgroup *member;
 };
 
-struct qgroup_rescan {
-   struct btrfs_work   work;
-   struct btrfs_fs_info*fs_info;
-};
-
-static void qgroup_rescan_start(struct btrfs_fs_info *fs_info,
-   struct qgroup_rescan *qscan);
+static int
+qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
+  int init_flags);
+static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
 
 /* must be called with qgroup_ioctl_lock held */
 static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
@@ -255,6 +252,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
int slot;
int ret = 0;
u64 flags = 0;
+   u64 rescan_progress = 0;
 
if (!fs_info->quota_enabled)
return 0;
@@ -312,20 +310,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
}
fs_info->qgroup_flags = btrfs_qgroup_status_flags(l,
  ptr);
-   fs_info->qgroup_rescan_progress.objectid =
-   btrfs_qgroup_status_rescan(l, ptr);
-   if (fs_info->qgroup_flags &
-   BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
-   struct qgroup_rescan *qscan =
-   kmalloc(sizeof(*qscan), GFP_NOFS);
-   if (!qscan) {
-   ret = -ENOMEM;
-   goto out;
-   }
-   fs_info->qgroup_rescan_progress.type = 0;
-   fs_info->qgroup_rescan_progress.offset = 0;
-   qgroup_

[PATCH 0/3] Btrfs: qgroup rescan fixes for next rc

2013-05-28 Thread Jan Schmidt
Here are three fixes for the new qgroup rescan feature. The first two
are quite small, the third one is a little bigger. I thought about
splitting that one up, but in the end I didn't find a good point to
break that up. It achieves more than one goal, I agree, but its more or
less a compact code change that need not be split artifically in my
opinion.

Jan Schmidt (3):
  Btrfs: fix memory patcher through fs_info->qgroup_ulist
  Btrfs: avoid double free of fs_info->qgroup_ulist
  Btrfs: fix qgroup rescan resume on mount

 fs/btrfs/ctree.h   |2 +
 fs/btrfs/disk-io.c |2 +
 fs/btrfs/qgroup.c  |  198 +---
 3 files changed, 131 insertions(+), 71 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: nocow 'C' flag ignored after balance

2013-05-28 Thread Kyle Gates

From: Liu Bo 

Subject: [PATCH] Btrfs: fix broken nocow after a normal balance

Balance will create reloc_root for each fs root, and it's going to
record last_snapshot to filter shared blocks.  The side effect of
setting last_snapshot is to break nocow attributes of files.

So here we update file extent's generation while walking relocated
file extents in data reloc root, and use file extent's generation
instead for checking if we have cross refs for the file extent.

That way we can make nocow happy again and have no impact on others.

Reported-by: Kyle Gates 
Signed-off-by: Liu Bo 
---
fs/btrfs/ctree.h   |2 +-
fs/btrfs/extent-tree.c |   18 +-
fs/btrfs/inode.c   |   10 --
fs/btrfs/relocation.c  |1 +
4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4560052..eb2e782 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3090,7 +3090,7 @@ int btrfs_pin_extent_for_log_replay(struct 
btrfs_root *root,

 u64 bytenr, u64 num_bytes);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-   u64 objectid, u64 offset, u64 bytenr);
+   u64 objectid, u64 offset, u64 bytenr, u64 gen);
struct btrfs_block_group_cache *btrfs_lookup_block_group(
 struct btrfs_fs_info *info,
 u64 bytenr);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1e84c74..f3b3616 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2816,7 +2816,8 @@ out:
static noinline int check_committed_ref(struct btrfs_trans_handle *trans,
 struct btrfs_root *root,
 struct btrfs_path *path,
- u64 objectid, u64 offset, u64 bytenr)
+ u64 objectid, u64 offset, u64 bytenr,
+ u64 fi_gen)
{
 struct btrfs_root *extent_root = root->fs_info->extent_root;
 struct extent_buffer *leaf;
@@ -2861,8 +2862,15 @@ static noinline int check_committed_ref(struct 
btrfs_trans_handle

*trans,
 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY))
 goto out;

- if (btrfs_extent_generation(leaf, ei) <=
- btrfs_root_last_snapshot(&root->root_item))
+ /*
+ * Usually generation in extent item is larger than that in file extent
+ * item because of delay refs.  But we don't want balance to break
+ * file's nocow behaviour, so use file_extent's generation which has
+ * been updates when we update fs root to point to relocated file
+ * extents in data reloc root.
+ */
+ fi_gen = max_t(u64, btrfs_extent_generation(leaf, ei), fi_gen);
+ if (fi_gen <= btrfs_root_last_snapshot(&root->root_item))
 goto out;

 iref = (struct btrfs_extent_inline_ref *)(ei + 1);
@@ -2886,7 +2894,7 @@ out:

int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
-   u64 objectid, u64 offset, u64 bytenr)
+   u64 objectid, u64 offset, u64 bytenr, u64 gen)
{
 struct btrfs_path *path;
 int ret;
@@ -2898,7 +2906,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle 
*trans,


 do {
 ret = check_committed_ref(trans, root, path, objectid,
-   offset, bytenr);
+   offset, bytenr, gen);
 if (ret && ret != -ENOENT)
 goto out;

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2cfdd33..976b045 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1727,6 +1727,8 @@ next_slot:
 ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 if (extent_type == BTRFS_FILE_EXTENT_REG ||
 extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ u64 gen;
+ gen = btrfs_file_extent_generation(leaf, fi);
 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 extent_offset = btrfs_file_extent_offset(leaf, fi);
 extent_end = found_key.offset +
@@ -1749,7 +1751,8 @@ next_slot:
 goto out_check;
 if (btrfs_cross_ref_exist(trans, root, ino,
   found_key.offset -
-   extent_offset, disk_bytenr))
+   extent_offset, disk_bytenr,
+   gen))
 goto out_check;
 disk_bytenr += extent_offset;
 disk_bytenr += cur_offset - found_key.offset;
@@ -7002,6 +7005,7 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 struct btrfs_key key;
 u64 disk_bytenr;
 u64 backref_offset;
+ u64 fi_gen;
 u64 extent_end;
 u64 num_bytes;
 int slot;
@@ -7048,6 +7052,7 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 }
 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
 backref_offset = btrfs_file_extent_offset(leaf, fi);
+ fi_gen = btrfs_file_extent_generation(leaf, fi);

 *orig_start = key.offset - backref_offset;
 *orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
@@ -7067,7 +7072,8 @@ static noinline int can_nocow_odirect(struct 
btrfs_trans_handle

*trans,
 * find any we must cow
 */
 if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
-   key.offset - backref_offset, disk_bytenr))
+   key.offset - backref_offset, disk_bytenr,
+   fi_gen))
 goto out;

 /*
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 704a1b8..07faabf 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1637,6 +1637,7 @@ int replace_file_extents(struct btrfs_trans_handle 
*trans,

 BUG_ON(ret < 0

[no subject]

2013-05-28 Thread Alex Lyakas
Hello all,
I have the following unresponsive btrfs:

btrfs_end_transaction() is called and is stuck in btrfs_tree_lock():

May 27 16:13:55 vc kernel: [ 7130.421159] kworker/u:85D
 0 19859  2 0x
May 27 16:13:55 vc kernel: [ 7130.421159]  880095335568
0046 00010093cb38 880083b11b48
May 27 16:13:55 vc kernel: [ 7130.421159]  880095335fd8
880095335fd8 880095335fd8 00013f40
May 27 16:13:55 vc kernel: [ 7130.421159]  8800a1fddd00
88008b1fc5c0 880095335578 880090f736d8
May 27 16:13:55 vc kernel: [ 7130.421159] Call Trace:
May 27 16:13:55 vc kernel: [ 7130.421159]  []
schedule+0x29/0x70
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_tree_lock+0xcd/0x250 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
add_wait_queue+0x60/0x60
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_init_new_buffer+0x68/0x140 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_alloc_free_block+0xdd/0x460 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
__set_page_dirty_nobuffers+0x1b/0x20
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
btree_set_page_dirty+0xe/0x10 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
__btrfs_cow_block+0x126/0x4f0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_cow_block+0x123/0x1d0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_search_slot+0x381/0x820 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
lookup_inline_extent_backref+0x8e/0x5b0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
btrfs_mark_buffer_dirty+0x99/0xf0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
setup_inline_extent_backref+0x18e/0x290 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
insert_inline_extent_backref+0x63/0x130 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
btrfs_alloc_path+0x1a/0x20 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
__btrfs_inc_extent_ref+0x9f/0x240 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
btrfs_merge_delayed_refs+0x289/0x300 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
run_clustered_refs+0x971/0xd00 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
btrfs_put_tree_mod_seq+0x10d/0x150 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_run_delayed_refs+0xd0/0x320 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
__btrfs_end_transaction+0xf7/0x410 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_end_transaction+0x10/0x20 [btrfs]

As a result, transaction cannot commit, it waits for all writers to
detach in the do-while loop.

May 27 16:13:55 vc kernel: [ 7130.419009] btrfs-transacti D
 0 15150  2 0x
May 27 16:13:55 vc kernel: [ 7130.419012]  88009f86bce8
0046 032d032d 
May 27 16:13:55 vc kernel: [ 7130.419016]  88009f86bfd8
88009f86bfd8 88009f86bfd8 00013f40
May 27 16:13:55 vc kernel: [ 7130.419020]  8800af1e9740
8800a03f8000 0090 88009693cb00
May 27 16:13:55 vc kernel: [ 7130.419023] Call Trace:
May 27 16:13:55 vc kernel: [ 7130.419027]  []
schedule+0x29/0x70
May 27 16:13:55 vc kernel: [ 7130.419031]  []
schedule_timeout+0x1ed/0x250
May 27 16:13:55 vc kernel: [ 7130.419055]  [] ?
btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419060]  [] ?
default_spin_lock_flags+0x9/0x10
May 27 16:13:55 vc kernel: [ 7130.419081]  []
btrfs_commit_transaction+0x3b8/0xae0 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419085]  [] ?
add_wait_queue+0x60/0x60
May 27 16:13:55 vc kernel: [ 7130.419104]  []
transaction_kthread+0x1b5/0x230 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419124]  [] ?
btree_invalidatepage+0x80/0x80 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.419128]  []
kthread+0xc0/0xd0
May 27 16:13:55 vc kernel: [ 7130.419132]  [] ?
flush_kthread_worker+0xb0/0xb0
May 27 16:13:55 vc kernel: [ 7130.419136]  []
ret_from_fork+0x7c/0xb0
May 27 16:13:55 vc kernel: [ 7130.419140]  [] ?
flush_kthread_worker+0xb0/0xb0

There is additional thread stuck in btrfs_tree_lock(), not sure how it
is related, perhaps there's some deadlock between the two?

May 27 16:13:55 vc kernel: [ 7130.421159] flush-btrfs-2   D
0001 0 18816  2 0x
May 27 16:13:55 vc kernel: [ 7130.421159]  88008b553948
0046 880017991050 
May 27 16:13:55 vc kernel: [ 7130.421159]  88008b553fd8
88008b553fd8 88008b553fd8 00013f40
May 27 16:13:55 vc kernel: [ 7130.421159]  880119b11740
8800af86 88008b553958 880090c9d988
May 27 16:13:55 vc kernel: [ 7130.421159] Call Trace:
May 27 16:13:55 vc kernel: [ 7130.421159]  []
schedule+0x29/0x70
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btrfs_tree_lock+0xcd/0x250 [btrfs]
May 27 16:13:55 vc kernel: [ 7130.421159]  [] ?
add_wait_queue+0x60/0x60
May 27 16:13:55 vc kernel: [ 7130.421159]  []
btree_write_cache_pages+0x3bc/0x880 [btrfs]
May 27 16:13:55

[PATCH] Btrfs: merge pending IO for tree log write back

2013-05-28 Thread Miao Xie
Before applying this patch, we flushed the log tree of the fs/file
tree firstly, and then flushed the log root tree. It is ineffective,
especially on the hard disk. This patch improved this problem by wrapping
the above two flushes by the same blk_plug.

By test, the performance of the sync write went up ~60%(2.9MB/s -> 4.6MB/s)
on my scsi disk whose disk buffer was enabled.

Test step:
 # mkfs.btrfs -f -m single 
 # mount  
 # dd if=/dev/zero of=/file0 bs=32K count=1024 oflag=sync

Signed-off-by: Miao Xie 
---
 fs/btrfs/transaction.c |  6 +++---
 fs/btrfs/tree-log.c| 17 ++---
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0544587..ce51603 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -736,9 +736,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
struct extent_state *cached_state = NULL;
u64 start = 0;
u64 end;
-   struct blk_plug plug;
 
-   blk_start_plug(&plug);
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
  mark, &cached_state)) {
convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT,
@@ -752,7 +750,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
}
if (err)
werr = err;
-   blk_finish_plug(&plug);
return werr;
 }
 
@@ -797,8 +794,11 @@ int btrfs_write_and_wait_marked_extents(struct btrfs_root 
*root,
 {
int ret;
int ret2;
+   struct blk_plug plug;
 
+   blk_start_plug(&plug);
ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+   blk_finish_plug(&plug);
ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
 
if (ret)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c276ac9..209d789 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -18,6 +18,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include "ctree.h"
 #include "transaction.h"
@@ -2358,6 +2359,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
unsigned long log_transid = 0;
+   struct blk_plug plug;
 
mutex_lock(&root->log_mutex);
log_transid = root->log_transid;
@@ -2401,8 +2403,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
/* we start IO on  all the marked extents here, but we don't actually
 * wait for them until later.
 */
+   blk_start_plug(&plug);
ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
if (ret) {
+   blk_finish_plug(&plug);
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&root->log_mutex);
@@ -2437,6 +2441,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}
 
if (ret) {
+   blk_finish_plug(&plug);
if (ret != -ENOSPC) {
btrfs_abort_transaction(trans, root, ret);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2452,6 +2457,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
+   blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
@@ -2474,6 +2480,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 * check the full commit flag again
 */
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+   blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2481,9 +2488,10 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
 
-   ret = btrfs_write_and_wait_marked_extents(log_root_tree,
-   &log_root_tree->dirty_log_pages,
-   EXTENT_DIRTY | EXTENT_NEW);
+   ret = btrfs_write_marked_extents(log_root_tree,
+&log_root_tree->dirty_log_pages,
+EXTENT_DIRTY | EXTENT_NEW);
+   blk_finish_plug(&plug);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
btrfs_free_logged_extents(log, log_transid);
@@ -2491,6 +2499,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out_wake_log_root;
}
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+