Re: [PATCH 10/15] btrfs-progs: fix qgroup realloc inheritance

2013-08-18 Thread Arne Jansen
On 08/15/13 01:16, Zach Brown wrote:
 qgroup.c:82:23: warning: memcpy with byte count of 0
 qgroup.c:83:23: warning: memcpy with byte count of 0
 
 The inheritance wasn't copying qgroups[] because a confused sizeof()
 gave 0 byte memcpy()s.  It's been like this for the year since it was
 merged, so I guess this isn't a very important thing to do :).

It only seems to hit if you give -[cx] before -i. I guess only very
few people use these options in the first place. They are primarily
for hosting providers.

Reviewed-by: Arne Jansen sensi...@gmx.net
 
 Signed-off-by: Zach Brown z...@redhat.com
 ---
  qgroup.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/qgroup.c b/qgroup.c
 index 038c4dc..86fe2b2 100644
 --- a/qgroup.c
 +++ b/qgroup.c
 @@ -74,7 +74,7 @@ qgroup_inherit_realloc(struct btrfs_qgroup_inherit 
 **inherit, int n, int pos)
  
   if (*inherit) {
   struct btrfs_qgroup_inherit *i = *inherit;
 - int s = sizeof(out-qgroups);
 + int s = sizeof(out-qgroups[0]);
  
   out-num_qgroups = i-num_qgroups;
   out-num_ref_copies = i-num_ref_copies;
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] Btrfs: fix oops when writing dirty qgroups to disk

2013-08-09 Thread Arne Jansen
On 07.08.2013 07:12, Wang Shilong wrote:
 When disabling quota, we should clear out list 'dirty_qgroups',otherwise,
 we will get oops if enabling quota again. Fix this by abstracting similar
 code from del_qgroup_rb().
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/qgroup.c | 43 ++-
  1 file changed, 14 insertions(+), 29 deletions(-)
 
 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index 64a9e3c..3b103e2 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct 
 btrfs_fs_info *fs_info,
   return qgroup;
  }
  
 -/* must be called with qgroup_lock held */
 -static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 +static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
  {
 - struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
 - struct btrfs_qgroup_list *list;
 + struct btrfs_qgroup_list *list = NULL;

Why do you initialize list to NULL here? It's always assigned
before used.

otherwise,
Reviewed-by: Arne Jansen sensi...@gmx.net

  
 - if (!qgroup)
 - return -ENOENT;
 -
 - rb_erase(qgroup-node, fs_info-qgroup_tree);
   list_del(qgroup-dirty);
 -
   while (!list_empty(qgroup-groups)) {
   list = list_first_entry(qgroup-groups,
   struct btrfs_qgroup_list, next_group);
 @@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, 
 u64 qgroupid)
   kfree(list);
   }
   kfree(qgroup);
 +}
 +
 +/* must be called with qgroup_lock held */
 +static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 +{
 + struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
  
 + if (!qgroup)
 + return -ENOENT;
 +
 + rb_erase(qgroup-node, fs_info-qgroup_tree);
 + __del_qgroup_rb(qgroup);
   return 0;
  }
  
 @@ -435,30 +439,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info 
 *fs_info)
  {
   struct rb_node *n;
   struct btrfs_qgroup *qgroup;
 - struct btrfs_qgroup_list *list;
  
   while ((n = rb_first(fs_info-qgroup_tree))) {
   qgroup = rb_entry(n, struct btrfs_qgroup, node);
   rb_erase(n, fs_info-qgroup_tree);
 -
 - while (!list_empty(qgroup-groups)) {
 - list = list_first_entry(qgroup-groups,
 - struct btrfs_qgroup_list,
 - next_group);
 - list_del(list-next_group);
 - list_del(list-next_member);
 - kfree(list);
 - }
 -
 - while (!list_empty(qgroup-members)) {
 - list = list_first_entry(qgroup-members,
 - struct btrfs_qgroup_list,
 - next_member);
 - list_del(list-next_group);
 - list_del(list-next_member);
 - kfree(list);
 - }
 - kfree(qgroup);
 + __del_qgroup_rb(qgroup);
   }
   /*
* we call btrfs_free_qgroup_config() when umounting

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] Btrfs: remove reduplicate check when disabling quota

2013-08-09 Thread Arne Jansen
On 07.08.2013 07:12, Wang Shilong wrote:
 We have checked 'quota_root' with qgroup_ioctl_lock held before,So
 here the check is reduplicate, remove it.
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/qgroup.c | 5 -
  1 file changed, 5 deletions(-)
 
 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index b809616..df2841d 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -943,11 +943,6 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
  
   btrfs_free_qgroup_config(fs_info);
  
 - if (!quota_root) {
 - ret = -EINVAL;
 - goto out;
 - }
 -
   ret = btrfs_clean_quota_tree(trans, quota_root);
   if (ret)
   goto out;

Reviewed-by: Arne Jansen sensi...@gmx.net
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread Arne Jansen
On 07.08.2013 23:43, Sergei Trofimovich wrote:
 From: Sergei Trofimovich sly...@gentoo.org
 
 Found by uselex.rb:
 btrfs_reada_detach: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/built-in.o 
 fs/btrfs/reada.o

even though the function is currently unused, I'm hesitating to remove it
as it's part of the reada-API and might be handy for anyone going to use
the API in the future.

-Arne

 
 Signed-off-by: Sergei Trofimovich sly...@gentoo.org
 ---
  fs/btrfs/ctree.h | 1 -
  fs/btrfs/reada.c | 9 +
  2 files changed, 1 insertion(+), 9 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index e91ab9e..f35e086 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -3861,7 +3861,6 @@ struct reada_control {
  struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 struct btrfs_key *start, struct btrfs_key *end);
  int btrfs_reada_wait(void *handle);
 -void btrfs_reada_detach(void *handle);
  int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
  
 diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
 index 1031b69..c41d470 100644
 --- a/fs/btrfs/reada.c
 +++ b/fs/btrfs/reada.c
 @@ -37,7 +37,7 @@
   * To trigger a readahead, btrfs_reada_add must be called. It will start
   * a read ahead for the given range [start, end) on tree root. The returned
   * handle can either be used to wait on the readahead to finish
 - * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
 + * (btrfs_reada_wait).
   *
   * The read ahead works as follows:
   * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
 @@ -979,10 +979,3 @@ int btrfs_reada_wait(void *handle)
   return 0;
  }
  #endif
 -
 -void btrfs_reada_detach(void *handle)
 -{
 - struct reada_control *rc = handle;
 -
 - kref_put(rc-refcnt, reada_control_release);
 -}

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread Arne Jansen
On 08/08/13 19:46, Zach Brown wrote:
 even though the function is currently unused, I'm hesitating to remove it
 as it's part of the reada-API and might be handy for anyone going to use
 the API in the future.

 I agree. As replied here,
 http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html
 please keep the function.
 
 If we're keeping score, put me down for being in favour of removing dead
 untested code.  git ressurection is easy. 

It's not really untested, it has been in use some time ago. But of
course there's a chance that some changes broke it.
Yes, git ressurection is easy. To inform potential users, you might
just leave a comment like this:

/*
 * There has been a function once to detach from a running reada.
 * If you need such functionality, just revert the commit that
 * added this comment.
 */

-Arne

 
 - z
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Heavy memory leak when using quota groups

2013-08-05 Thread Arne Jansen
On 05.08.2013 18:35, Tomasz Chmielewski wrote:
 I am trying to use qgroups
 functionality  with a basic random-write workload, it constantly
 keeps leaking memory  within few minutes of IO, there is either
 out-of-memory killer trying to kill some tasks or there are
 page-allocation failures that btrfs or other kernel module
 experiences.
 
 FYI, I just saw something similar with 3.10 on a server with 32 GB RAM:
 
 The result was a frozen server and a need to hard reset.
 

What do I have to do to reproduce it here? How do you generate the load?
What is the disk setup, what the qgroups setup?

Thanks,
Arne
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs quota examples?

2013-08-02 Thread Arne Jansen
On 07/29/13 10:05, Tomasz Chmielewski wrote:
 On Mon, 10 Jun 2013 09:41:39 +0200
 Arne Jansen sensi...@gmx.net wrote:
 
 Now, my questions:

 - what do both 104882176 104882176 numbers represent?

 The first number represents the amount of data in that subvolume,
 regardless whether that data is shared with other subvolumes or
 not.
 The second number shows the amount of data that is unique to this
 subvolume and not shared with others, i.e. the amount of space
 that will get freed if you delete this subvolume.
 
 I've played with qgroups for some time, but the results are rather
 inconsistent.
 
 I.e. here - what does a negative number represent in 0/1181 row?

Either you have turned on qgroups after filling the subvol with
some data, or you've managed to produce a tracking error. In that
case, we would be interested how you did that and how to reproduce
it.

-Arne

 
 # btrfs qgroup show /mnt/lxc2
 0/260 151490953216 151490953216
 0/261 180969472 180969472
 0/262 17888 983040
 0/377 180310016 25776128
 0/378 304088072192 304088072192
 0/535 571944960 417370112
 0/536 68550987776 68550987776
 0/642 247463936 92921856
 0/1175 617213952 827392
 0/1181 16112013312 -22184235008
 0/1268 38296248320 0
 0/1269 616386560 0
 0/1270 4096 4096
 0/1271 4096 4096
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs qgroup assign - ERROR: bad relation requested

2013-08-02 Thread Arne Jansen
On 07/31/13 07:39, Tomasz Chmielewski wrote:
 On Wed, 31 Jul 2013 13:13:37 +0800
 Wang Shilong wangsl.f...@cn.fujitsu.com wrote:
 
 # git pull origin master

 Oops, i am sorry, here should:

 git pull origin qgroup

 would you please try it again
 
 Excellent, it works:
 
 # btrfs qgroup show -p /mnt/lxc2
 0/260 154273873920 154273873920 ---
 0/261 181260288 181260288 ---
 0/262 17888 983040 ---
 0/377 180539392 26005504 ---
 0/378 308607238144 308607238144 ---
 0/535 572407808 417832960 ---
 0/536 68085026816 68085026816 ---
 0/642 247824384 93282304 ---
 0/1276 636026880 4096 1/1
 0/1277 38301687808 4096 1/1
 0/1278 617164800 32768 1/1
 0/1279 38297591808 4096 1/1
 0/1284 38299160576 0 1/1
 0/1285 38299160576 21364736 1/1
 0/1286 620396544 0 1/1
 0/1287 633090048 12722176 1/1
 0/1293 38301687808 4096 1/1
 0/1294 636026880 4096 1/1
 1/1 38972137472 38972137472 ---
 
 
 Thanks a lot!
 
 
 Am I correct to think that I have to run quota rescan after
 assigning groups?
 
 # btrfs qgroup assign 378 1/1 /mnt/lxc2  - assigning volid 378, having 
 300+ GB
 
 # btrfs qgroup show -p /mnt/lxc2 | grep 1/1
 0/378 308607238144 308607238144 1/1
 0/1276 636026880 4096 1/1
 0/1277 38301687808 4096 1/1
 0/1278 617164800 32768 1/1
 0/1279 38297591808 4096 1/1
 0/1284 38299160576 0 1/1
 0/1285 38299160576 21364736 1/1
 0/1286 620396544 0 1/1
 0/1287 633090048 12722176 1/1
 0/1293 38301687808 4096 1/1
 0/1294 636026880 4096 1/1
 1/1 38972137472 38972137472 ---  - although we've added 300+ GB volume, 
 the total numbers are unchanged
 
 
 That can be quite a bit of IO?

To avoid the rescan you can do the assignment atomically with
the creation of the subvol.
Basically if you lay out your future quota/subvol/snapshot when
you create your filesystem, you never have to do a rescan. The
commands are all there for that, but it's not really easy to do
if you have a complex setup. That part needs a better documentation
and more examples, and also probably a better UI.
Only if you change your mind later on you probably have to rescan.

-Arne

 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs quota examples?

2013-06-10 Thread Arne Jansen
On 10.06.2013 08:21, Tomasz Chmielewski wrote:
 I'm trying to use btrfs quotas, but I'm unable to find reliable
 documentation on its usage.
 
 Let's start with an empty subvolume:
 
 - assign 200 MB space to it:
 
 # btrfs qgroup limit 200m /mnt/btrfs-backup/tester/
 
 
 - see the usage:
 
 # btrfs qgroup show /mnt/btrfs-backup/tester/
 0/803 0 0
 
 
 - let's add a 100 MB file:
 
 # dd if=/dev/urandom of=bigfile bs=1M count=100
 
 
 - sync the FS to make sure quota is updated:
 
 # btrfs filesystem
 sync /mnt/btrfs-backup/ FSSync '/mnt/btrfs-backup/'
 
 
 - see current quota usage:
 
 # btrfs qgroup show /mnt/btrfs-backup/backuppc/tester/
 0/803 104882176 104882176
 
 
 Now, my questions:
 
 - what do both 104882176 104882176 numbers represent?

The first number represents the amount of data in that subvolume,
regardless whether that data is shared with other subvolumes or
not.
The second number shows the amount of data that is unique to this
subvolume and not shared with others, i.e. the amount of space
that will get freed if you delete this subvolume.
For how this extends to quota groups see http://sensille.com/qgroups.pdf

 
 - how can I verify the quota assigned to any given subvolume? I've assigned 
 200 MB, but how can I verify that number?

The original patch set didn't include commands for that, but I
think I've seen patches for it on the mailing list, though I
can't find them right now.

-Arne

 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] xfstests btrfs/314: test send / receive

2013-06-07 Thread Arne Jansen
On 07.06.2013 16:50, Eric Sandeen wrote:
 On 6/7/13 5:29 AM, Dave Chinner wrote:
 On Fri, Jun 07, 2013 at 09:18:58AM +0200, Jan Schmidt wrote:
 (cc Arne for far-progs discussion)

 On Thu, June 06, 2013 at 19:54 (+0200), Eric Sandeen wrote:
 On 6/6/13 10:20 AM, Jan Schmidt wrote:
 Basic send / receive functionality test for btrfs. Requires current
 version of fsstress built (-x support). Relies on fssum tool, which is
 not part of the test suite but can skip the test if it is missing.

 Signed-off-by: Jan Schmidt list@jan-o-sch.net

 w/o commenting on the test itself, I'm a little uneasy about requiring
 some external, not-widely-installed tool for this to run.  The fear is
 that it won't be run as often as it could/should be.

 The main purpose is to have it run by developers changing something around 
 btrfs
 send / receive and probably the backref walker (while there exists a 
 separate
 test not requiring fssum for backrefs). I think we can get them to install 
 fssum.

 There's no point in having tests that require you to go find
 something else before the tests can be run. That's been tried
 before, and it doesn't work - the test just won't get run by
 the majority of people who run xfstests.

 Could the same test be done w/o fssum, or should we maybe put a copy
 of fssum into xfstests/src/fssum.c ?

 I don't know any adequate replacement for fssum in this case. The purpose 
 is to
 build a checksum for a whole file system tree, including data and partly 
 metadata.

 I don't feel like copying fssum from far-progs into xfstests, though it 
 probably
 won't hurt much. However, I cannot promise we won't make changes to it for
 far-progs, probably creating two incompatible versions of fssum in the 
 wild. Arne?

 Or does fssum exist in any standard distro package?

 It doesn't. Perhaps Josef can hurry and make a Fedora package for it, if 
 that
 prevents a separate copy to xfstests :-)

 No, it doesn't. Packages would be needed for debian, suse, SLES,
 RHEL, etc for that to be a useful method of distribution. Just dump
 a snapshot of the utility in the xfstests src dir so we don't have
 to care about distribution issues...
 
 Yup I agree with this, if it's not widely available or replaceable by more
 common tools, let's just put a snapshot in xfstests.

I'm fine with that, too.

-Arne

 
 -Eric
 
 Cheers,

 Dave.

 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] introduce list_for_each_entry_del

2013-06-05 Thread Arne Jansen
On 05.06.2013 04:09, Jörn Engel wrote:
 On Tue, 4 June 2013 14:44:35 -0400, Jörn Engel wrote:

 Or while_list_drain?

I'm fine with while_list_drain, although a name starting with list_
like all other list macros would be nice. How about just list_drain?
The next question is where to put it in the header so that anyone
doing list cleanup stumbles upon it. Maybe directly below list_del?

-Arne

 
 Not sure if the silence is approval or lack of interest, but a new set
 of patches is posted.  By playing around with the implementation a
 bit, I have actually found a variant that makes the object code
 shrink.  Not one variant gave same-size object code.  There's compiler
 optimization for you.
 
 Jörn
 
 --
 Money can buy bandwidth, but latency is forever.
 -- John R. Mashey

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] introduce list_for_each_entry_del

2013-06-04 Thread Arne Jansen
On 06/04/13 16:53, Chris Mason wrote:
 Quoting Christoph Hellwig (2013-06-04 10:48:56)
 On Mon, Jun 03, 2013 at 03:55:55PM -0400, J??rn Engel wrote:
 Actually, when I compare the two invocations, I prefer the
 list_for_each_entry_del() variant over list_pop_entry().

 while ((ref = list_pop_entry(prefs, struct __prelim_ref, list))) {
 list_for_each_entry_del(ref, prefs, list) {

 Christoph?

 I really don't like something that looks like an iterator (*for_each*)
 to modify a list.  Maybe it's just me, so I'd love to hear others chime
 in.
 
 Have to agree with Christoph.  I just couldn't put my finger on why I
 didn't like it until I saw the list_pop_entry suggestion.

list_pop_each_entry?

 
 -chris
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: `btrfs receive` almost coming to a halt

2013-05-10 Thread Arne Jansen
On 09.05.2013 17:14, Remco Hosman - Yerf IT wrote:
 kernel: 3.9.0
 btrfs-progs: pulled from git this morning
 
 Trying to receive a 5gig send file. the first bit is fast, doing 10 - 
 50MB/sec.
 then it slows down. cpu usage is 50% (dual core machine).
 when i do a strace, it looks like this, repeating over an over, about 1 piece 
 each second:
 --
 read(3, q\0\0\0\20\0008\352\327o, 10) = 10
 read(3, 
 \22\0\10\0\0\0$~\0\0\0\0\30\0\10\0\0\0\2\0\0\0\0\0\17\0\24\0DB2/..., 113) = 
 113
 open(/media/snaps/yerf-2013-05-02-03:15:01/DB2/DB2-flat.vmdk, 
 O_RDONLY|O_NOATIME) = 6
 ioctl(5, 0x4020940d, 0x7fffc6d41c60)= 0
 close(6)= 0
 read(3, q\0\0\0\20\0\242\357\263, 10) = 10
 read(3, 
 \22\0\10\0\0\0~\0\0\0\0\30\0\10\0\0\0\2\0\0\0\0\0\17\0\24\0DB2/..., 113) = 
 113
 open(/media/snaps/yerf-2013-05-02-03:15:01/DB2/DB2-flat.vmdk, 
 O_RDONLY|O_NOATIME) = 6
 ioctl(5, 0x4020940d, 0x7fffc6d41c60)= 0
 close(6)= 0
 --
 

Is this the receive side?
Where does the data come from, a local file or via network?

-Arne

 it pauses for a second after ioctl(5, 0x4020940d
 it has been running like that for 3 hours now.
 the file its working is large (80gig) and filefrag reports 648862 extends.
 filesystem is mounted with rw,relatime,compress-force=lzo,space_cache
 
 anything i can do to see what the problem is?
 
 Remco--
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs: make static code static remove dead code

2013-04-20 Thread Arne Jansen
On 04/19/13 21:21, Eric Sandeen wrote:
 Big patch, but all it does is add statics to functions which
 are in fact static, then remove the associated dead-code fallout.
 
 removed functions:
 
 btrfs_iref_to_path()
 __btrfs_lookup_delayed_deletion_item()
 __btrfs_search_delayed_insertion_item()
 __btrfs_search_delayed_deletion_item()
 find_eb_for_page()
 btrfs_find_block_group()
 range_straddles_pages()
 extent_range_uptodate()
 btrfs_file_extent_length()
 btrfs_reada_detach()
 btrfs_scrub_cancel_devid()
 btrfs_start_transaction_lflush()
 
 btrfs_print_tree() is left because it is used for debugging.
 ulist.c functions are left because that one is odd; they're exported?

I originally wrote them to be included under lib/, but got no response
on LKML, so I added it only to btrfs, but forgot to remove the exports
for that.
I think it would still make sense to move them to lib/, as this data
structure can be quite useful from time to time.

-Arne

 
 Signed-off-by: Eric Sandeen sand...@redhat.com
 ---
 
 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index bd605c8..cb8acab 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -1249,32 +1249,6 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, 
 struct btrfs_path *path,
  }
  
  /*
 - * this iterates to turn a btrfs_inode_ref into a full filesystem path. 
 elements
 - * of the path are separated by '/' and the path is guaranteed to be
 - * 0-terminated. the path is only given within the current file system.
 - * Therefore, it never starts with a '/'. the caller is responsible to 
 provide
 - * size bytes in dest. the dest buffer will be filled backwards. finally,
 - * the start point of the resulting string is returned. this pointer is 
 within
 - * dest, normally.
 - * in case the path buffer would overflow, the pointer is decremented further
 - * as if output was written to the buffer, though no more output is actually
 - * generated. that way, the caller can determine how much space would be
 - * required for the path to fit into the buffer. in that case, the returned
 - * value will be smaller than dest. callers must check this!
 - */
 -char *btrfs_iref_to_path(struct btrfs_root *fs_root,
 -  struct btrfs_path *path,
 -  struct btrfs_inode_ref *iref,
 -  struct extent_buffer *eb_in, u64 parent,
 -  char *dest, u32 size)
 -{
 - return btrfs_ref_to_path(fs_root, path,
 -  btrfs_inode_ref_name_len(eb_in, iref),
 -  (unsigned long)(iref + 1),
 -  eb_in, parent, dest, size);
 -}
 -
 -/*
   * this makes the path point to (logical EXTENT_ITEM *)
   * returns BTRFS_EXTENT_FLAG_DATA for data, BTRFS_EXTENT_FLAG_TREE_BLOCK for
   * tree blocks and 0 on error.
 diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
 index 310a7f6..0f446d7 100644
 --- a/fs/btrfs/backref.h
 +++ b/fs/btrfs/backref.h
 @@ -59,9 +59,6 @@ int paths_from_inode(u64 inum, struct inode_fs_paths 
 *ipath);
  int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
   struct btrfs_fs_info *fs_info, u64 bytenr,
   u64 time_seq, struct ulist **roots);
 -char *btrfs_iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
 -  struct btrfs_inode_ref *iref, struct extent_buffer *eb,
 -  u64 parent, char *dest, u32 size);
  char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
   u32 name_len, unsigned long name_off,
   struct extent_buffer *eb_in, u64 parent,
 diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
 index 15b9408..e139bbf 100644
 --- a/fs/btrfs/compression.c
 +++ b/fs/btrfs/compression.c
 @@ -82,6 +82,10 @@ struct compressed_bio {
   u32 sums;
  };
  
 +static int btrfs_decompress_biovec(int type, struct page **pages_in,
 +u64 disk_start, struct bio_vec *bvec,
 +int vcnt, size_t srclen);
 +
  static inline int compressed_bio_size(struct btrfs_root *root,
 unsigned long disk_size)
  {
 @@ -739,7 +743,7 @@ static int comp_num_workspace[BTRFS_COMPRESS_TYPES];
  static atomic_t comp_alloc_workspace[BTRFS_COMPRESS_TYPES];
  static wait_queue_head_t comp_workspace_wait[BTRFS_COMPRESS_TYPES];
  
 -struct btrfs_compress_op *btrfs_compress_op[] = {
 +static struct btrfs_compress_op *btrfs_compress_op[] = {
   btrfs_zlib_compress,
   btrfs_lzo_compress,
  };
 @@ -910,8 +914,9 @@ int btrfs_compress_pages(int type, struct address_space 
 *mapping,
   * be contiguous.  They all correspond to the range of bytes covered by
   * the compressed extent.
   */
 -int btrfs_decompress_biovec(int type, struct page **pages_in, u64 disk_start,
 - struct bio_vec *bvec, int vcnt, size_t srclen)
 +static int 

Re: [PATCH] Btrfs: make ulist_{init,fin}() static and not exported as symbol

2013-04-20 Thread Arne Jansen
On 04/20/13 19:43, Eric Sandeen wrote:
 On 4/20/13 7:43 AM, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com

 ulist is a generic structure to iterate tree. we export them
 just in the hope it may be used by other modules. However, until
 now, it is just used by btrfs. The main point is that ulist_{init,fin}()
 should be only called by ulist_{alloc,reinit,free}().So we don't need
 to export them and make them staic will be best choice.
 
 It actually seems like all EXPORT_SYMBOLS should be removed, no?
 I can't imagine that btrfs needs to be exporting any of these
 functions to any other module...

let's see that we move ulists to lib/ for 3.10.

-Arne

 
 Thanks,
 -Eric
 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 ---
  fs/btrfs/ulist.c | 6 ++
  fs/btrfs/ulist.h | 2 --
  2 files changed, 2 insertions(+), 6 deletions(-)

 diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
 index ddc61ca..50fc1c4 100644
 --- a/fs/btrfs/ulist.c
 +++ b/fs/btrfs/ulist.c
 @@ -48,13 +48,12 @@
   * Note: don't use this function to init an already used ulist, use
   * ulist_reinit instead.
   */
 -void ulist_init(struct ulist *ulist)
 +static void ulist_init(struct ulist *ulist)
  {
  ulist-nnodes = 0;
  ulist-nodes = ulist-int_nodes;
  ulist-nodes_alloced = ULIST_SIZE;
  }
 -EXPORT_SYMBOL(ulist_init);
  
  /**
   * ulist_fini - free up additionally allocated memory for the ulist
 @@ -63,7 +62,7 @@ EXPORT_SYMBOL(ulist_init);
   * This is useful in cases where the base 'struct ulist' has been statically
   * allocated.
   */
 -void ulist_fini(struct ulist *ulist)
 +static void ulist_fini(struct ulist *ulist)
  {
  /*
   * The first ULIST_SIZE elements are stored inline in struct ulist.
 @@ -73,7 +72,6 @@ void ulist_fini(struct ulist *ulist)
  kfree(ulist-nodes);
  ulist-nodes_alloced = 0;   /* in case ulist_fini is called twice */
  }
 -EXPORT_SYMBOL(ulist_fini);
  
  /**
   * ulist_reinit - prepare a ulist for reuse
 diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
 index 21a1963..e05db47 100644
 --- a/fs/btrfs/ulist.h
 +++ b/fs/btrfs/ulist.h
 @@ -60,8 +60,6 @@ struct ulist {
  struct ulist_node int_nodes[ULIST_SIZE];
  };
  
 -void ulist_init(struct ulist *ulist);
 -void ulist_fini(struct ulist *ulist);
  void ulist_reinit(struct ulist *ulist);
  struct ulist *ulist_alloc(gfp_t gfp_mask);
  void ulist_free(struct ulist *ulist);

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC] Btrfs: fix confusing edquot happening case

2013-04-15 Thread Arne Jansen
On 15.04.2013 12:37, Wang Shilong wrote:
 Step to reproduce:
   mkfs.btrfs disk
   mount disk mnt
   dd if=/dev/zero of=/mnt/data bs=1M count=10
   sync
   btrfs quota enable mnt
   btrfs qgroup create 0/5 mnt
   btrfs qgroup limit 5M 0/5 mnt
   rm -f /mnt/data
   sync
   btrfs qgroup show mnt
   dd if=/dev/zero of=data bs=1M count=1
 
 From the perspective of users, qgroup's referenced or referenced
 
 is negative,But user can not continue to write data! a workaround
 way is to cast u64 to int64 when doing qgroup reservation.
 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 ---
 This confusing edquot may also happen after Jan's qgroup
 rescan has been implemented.
 ---
  fs/btrfs/qgroup.c |4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index b44124d..0178223 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -1523,14 +1523,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 
 num_bytes)
   qg = (struct btrfs_qgroup *)(uintptr_t)unode-aux;
  
   if ((qg-lim_flags  BTRFS_QGROUP_LIMIT_MAX_RFER) 
 - qg-reserved + qg-rfer + num_bytes 
 + qg-reserved + (signed long long)qg-rfer + num_bytes 

why not use s64 instead of signed long long? Otherwise this is the right way to
solve this.

Thanks,
Arne

   qg-max_rfer) {
   ret = -EDQUOT;
   goto out;
   }
  
   if ((qg-lim_flags  BTRFS_QGROUP_LIMIT_MAX_EXCL) 
 - qg-reserved + qg-excl + num_bytes 
 + qg-reserved + (signed long long)qg-excl + num_bytes 
   qg-max_excl) {
   ret = -EDQUOT;
   goto out;

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC] Btrfs: fix confusing edquot happening case

2013-04-15 Thread Arne Jansen
On 15.04.2013 13:43, Wang Shilong wrote:
 Hello Arne,
 
 On 15.04.2013 12:37, Wang Shilong wrote:
 Step to reproduce:
 mkfs.btrfs disk
 mount disk mnt
 dd if=/dev/zero of=/mnt/data bs=1M count=10
 sync
 btrfs quota enable mnt
 btrfs qgroup create 0/5 mnt
 btrfs qgroup limit 5M 0/5 mnt
 rm -f /mnt/data
 sync
 btrfs qgroup show mnt
 dd if=/dev/zero of=data bs=1M count=1

 From the perspective of users, qgroup's referenced or referenced

 is negative,But user can not continue to write data! a workaround
 way is to cast u64 to int64 when doing qgroup reservation.

 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 ---
 This confusing edquot may also happen after Jan's qgroup
 rescan has been implemented.
 ---
 fs/btrfs/qgroup.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index b44124d..0178223 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -1523,14 +1523,14 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, 
 u64 num_bytes)
 qg = (struct btrfs_qgroup *)(uintptr_t)unode-aux;

 if ((qg-lim_flags  BTRFS_QGROUP_LIMIT_MAX_RFER) 
 -   qg-reserved + qg-rfer + num_bytes 
 +   qg-reserved + (signed long long)qg-rfer + num_bytes 

 why not use s64 instead of signed long long? Otherwise this is the right way 
 to
 solve this.
 
 Yeah,V2 is coming. By the way, do you mind that i add Acked-by: Arne Jasen 
 sensi...@gmx.net?

You can add a Reviewed-by: Arne Jansen sensi...@gmx.net

Thanks,
Arne

 
 Thanks,
 Wang

 Thanks,
 Arne

 qg-max_rfer) {
 ret = -EDQUOT;
 goto out;
 }

 if ((qg-lim_flags  BTRFS_QGROUP_LIMIT_MAX_EXCL) 
 -   qg-reserved + qg-excl + num_bytes 
 +   qg-reserved + (signed long long)qg-excl + num_bytes 
 qg-max_excl) {
 ret = -EDQUOT;
 goto out;

 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG?] Btrfs quota: overwritten space is counted twice

2013-04-06 Thread Arne Jansen
On 04/05/13 23:03, Koen De Wit wrote:
 When data in a file is overwritten, starting somewhere in the middle of
 the file, the overwritten space is counted twice against the space usage
 numbers. Is this a bug, or did I something wrong?

Neither. This is a result of how btrfs allocates and overwrites extents.
Your file is firstly allocated in one extent of 1000kB. When you
overwrite parts of it, the original extent is kept, and a new extent for
the overwritten part is allocated, resulting in another allocation of
1000kB. Once you overwrite the first 100kB (the part that is still in
use from the first extent), it should be freed.
So your file really allocates 2000kB on disk, and qgroups accounts
it correctly. You can verify it with btrfs-debug-tree.

-Arne

 
 This is what I did:
 
 I create a subvolume and limit it to 4 MB, and create a 1000 KB file in
 the subvol:
 
 # btrfs subvol create s
   Create subvolume './s'
 # btrfs qgroup limit 4m s
 # btrfs qgroup show ./ | grep 260
   0/260 4096 4096
 # dd if=/dev/zero of=s/file bs=1024 count=1000; sync
 # ls -lah s/file
   -rw-r--r--. 1 root root 1000K Apr  6 00:13 s/file
 # btrfs qgroup show ./ | grep 260
   0/260 1028096 1028096
 
 Then I overwrite the last 900 KB of the file, and add 100 KB of data,
 resulting in a 1.1 MB file. The space usage numbers shows 2 MB however:
 
 # dd if=/dev/zero of=s/file bs=1024 count=1000 seek=100; sync
 # ls -lah s/file
   -rw-r--r--. 1 root root 1.1M Apr  6 00:13 s/file
 # btrfs qgroup show ./ | grep 260
   0/260 2052096 2052096
 
 I repeat this twice, the file becomes 1.3 MB but the usage number goes
 to almost 4 MB:
 
 # dd if=/dev/zero of=s/file bs=1024 count=1000 seek=200; sync
 # dd if=/dev/zero of=s/file bs=1024 count=1000 seek=300; sync
 # ls -lah s/file
   -rw-r--r--. 1 root root 1.3M Apr  6 00:14 s/file
 # btrfs qgroup show ./ | grep 260
   0/260 4100096 4100096
 
 Doing the same again results in quota exceeded errors:
 
 # dd if=/dev/zero of=s/file bs=1024 count=1000 seek=400; sync
   dd: writing `s/file': Disk quota exceeded
   78+0 records in
   77+0 records out
   78848 bytes (79 kB) copied, 0.00138135 s, 57.1 MB/s
 # ls -lah s/file
   -rw-r--r--. 1 root root 477K Apr  6 00:15 s/file
 # btrfs qgroup show ./ | grep 260
   0/260 4182016 4182016
 # touch s/emptyfile
   touch: cannot touch `s/emptyfile': Disk quota exceeded
 
 Koen.
 -- 
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] btrfs quota

2013-03-31 Thread Arne Jansen
Hello Wang,

On 03/31/13 03:44, Wang Shilong wrote:
 Hello Arne, Jan,
 
  When i run stresstest with quota enabled, i hit a panic where
 btrfs_find_all_roots() is called. Would you please double check it.
 

On what kernel version do you hit this (git commit id preferred)?
Can you send us the message from dmesg?

Thanks,
arne

  stresstest -n 1  -t 4.
 
 Maybe there is still race condition when walking backref  trees, Jan
 would you please check the backref.c ..
 btrfs_find_all_roots() is called in btrfs_qgroup_account_ref () to find
 all roots that reference a extent. I think this is called because
 
 we introduce referenced/exclusive concepts. The point is that i think
  this concept's disadvantages outweigh the advantages:
   1
 walking backref  tree is really time-consuming.
 2 referenced/exclusive makes it really complex when snapshot happens.
 3 quota has a high coupling with other module.
  I'd prefer to just keep referenced value, it is not be elegant, but
 it helps more.
 
 Thanks,
 Wang
 
 
 
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: cleanup to remove reduplicate code in iterate_extent_inode()

2013-03-30 Thread Arne Jansen
On 03/30/13 12:55, Wang Shilong wrote:
 snip
 
 On 03/29/13 14:42, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com

 Just remove the unnecessary check and assignment.

 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 ---
 fs/btrfs/backref.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index 3ca413bb..e102b48 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -1499,7 +1499,7 @@ int iterate_extent_inodes(struct btrfs_fs_info 
 *fs_info,
 if (ret)
 break;
 ULIST_ITER_INIT(root_uiter);
 -   while (!ret  (root_node = ulist_next(roots, root_uiter))) {
 +   while ((root_node = ulist_next(roots, root_uiter))) {

 It doesn't look unnecessary at all to me. ret is set in the loop and
 only checked in the while condition.

 pr_debug(root %llu references leaf %llu, data list 
  %#llx\n, root_node-val, ref_node-val,
  (long long)ref_node-aux);
 @@ -1510,7 +1510,6 @@ int iterate_extent_inodes(struct btrfs_fs_info 
 *fs_info,
 iterate, ctx);
 }
 ulist_free(roots);
 -   roots = NULL;

 roots gets freed again later on. If you don't set it to NULL, it will
 result in a double free.
 
 Maybe you mean this?
 
 http://marc.info/?l=linux-btrfsm=136456233929528w=2
 ulist_free() here is unnecessary and may cause a double free…
 So we don't need to set it to NULL again..

Yeah, I haven't seen your other patch.

 
 
 

 -Arne

 }

 free_leaf_list(refs);


 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 1/6] Btrfs: introduce a mutex lock for btrfs quota operations

2013-03-30 Thread Arne Jansen
On 03/28/13 11:53, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com
 
 This patch introduces mutex lock 'quota_lock', and makes
 all the user change for quota protected by quota_lock.
 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/ctree.h   |3 +++
  fs/btrfs/disk-io.c |1 +
  fs/btrfs/ioctl.c   |   16 
  3 files changed, 16 insertions(+), 4 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index 6e81860..a11a8ed 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -1584,6 +1584,9 @@ struct btrfs_fs_info {
   struct rb_root qgroup_tree;
   spinlock_t qgroup_lock;
  
 + /* protect user change operations for quota */
 + struct mutex quota_lock;
 +
   /* list of dirty qgroups to be written at next commit */
   struct list_head dirty_qgroups;
  
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index fe82d08..4552f14 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -2250,6 +2250,7 @@ int open_ctree(struct super_block *sb,
   mutex_init(fs_info-dev_replace.lock);
  
   spin_lock_init(fs_info-qgroup_lock);
 + mutex_init(fs_info-quota_lock);
   fs_info-qgroup_tree = RB_ROOT;
   INIT_LIST_HEAD(fs_info-dirty_qgroups);
   fs_info-qgroup_seq = 1;
 diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
 index 222ce84..e2950f1 100644
 --- a/fs/btrfs/ioctl.c
 +++ b/fs/btrfs/ioctl.c
 @@ -752,7 +752,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
  
   if (btrfs_root_refs(BTRFS_I(dir)-root-root_item) == 0)
   goto out_up_read;
 -
 + mutex_lock(BTRFS_I(dir)-root-fs_info-quota_lock);
   if (snap_src) {
   error = create_snapshot(snap_src, dir, dentry, name, namelen,
   async_transid, readonly, inherit);
 @@ -762,6 +762,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
   }
   if (!error)
   fsnotify_mkdir(dir, dentry);
 + mutex_unlock(BTRFS_I(dir)-root-fs_info-quota_lock);

You are completely serializing subvolume operations here. I'd prefer if
you'd move the lock to a lower level to only protect the quota
operations. Can't you move the lock completely to qgroup.c?

  out_up_read:
   up_read(BTRFS_I(dir)-root-fs_info-subvol_sem);
  out_dput:
 @@ -3693,6 +3694,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
 void __user *arg)
   goto drop_write;
   }
  
 + mutex_lock(root-fs_info-quota_lock);
   down_read(root-fs_info-subvol_sem);
   if (sa-cmd != BTRFS_QUOTA_CTL_RESCAN) {
   trans = btrfs_start_transaction(root, 2);
 @@ -3728,6 +3730,7 @@ static long btrfs_ioctl_quota_ctl(struct file *file, 
 void __user *arg)
  out:
   kfree(sa);
   up_read(root-fs_info-subvol_sem);
 + mutex_unlock(root-fs_info-quota_lock);
  drop_write:
   mnt_drop_write_file(file);
   return ret;
 @@ -3754,6 +3757,7 @@ static long btrfs_ioctl_qgroup_assign(struct file 
 *file, void __user *arg)
   goto drop_write;
   }
  
 + mutex_lock(root-fs_info-quota_lock);
   trans = btrfs_join_transaction(root);
   if (IS_ERR(trans)) {
   ret = PTR_ERR(trans);
 @@ -3775,6 +3779,7 @@ static long btrfs_ioctl_qgroup_assign(struct file 
 *file, void __user *arg)
  
  out:
   kfree(sa);
 + mutex_unlock(root-fs_info-quota_lock);
  drop_write:
   mnt_drop_write_file(file);
   return ret;
 @@ -3805,11 +3810,11 @@ static long btrfs_ioctl_qgroup_create(struct file 
 *file, void __user *arg)
   ret = -EINVAL;
   goto out;
   }
 -
 + mutex_lock(root-fs_info-quota_lock);
   trans = btrfs_join_transaction(root);
   if (IS_ERR(trans)) {
   ret = PTR_ERR(trans);
 - goto out;
 + goto out_unlock;
   }
  
   /* FIXME: check if the IDs really exist */
 @@ -3824,6 +3829,8 @@ static long btrfs_ioctl_qgroup_create(struct file 
 *file, void __user *arg)
   if (err  !ret)
   ret = err;
  
 +out_unlock:
 + mutex_unlock(root-fs_info-quota_lock);
  out:
   kfree(sa);
  drop_write:
 @@ -3852,7 +3859,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, 
 void __user *arg)
   ret = PTR_ERR(sa);
   goto drop_write;
   }
 -
 + mutex_lock(root-fs_info-quota_lock);
   trans = btrfs_join_transaction(root);
   if (IS_ERR(trans)) {
   ret = PTR_ERR(trans);
 @@ -3874,6 +3881,7 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, 
 void __user *arg)
  
  out:
   kfree(sa);
 + mutex_unlock(root-fs_info-quota_lock);
  drop_write:
   mnt_drop_write_file(file);
   return ret;
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V2 2/6] Btrfs: remove some unnecessary spin_lock usages

2013-03-30 Thread Arne Jansen
On 03/28/13 11:54, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com
 
 We use mutex_lock to protect all the user change operaions.
 So when we are calling find_qgroup_rb() to check whether
 qgroup exists, we don't have to hold spin_lock.
 
 Besides, when enabling/disabling quota,it must be single
 thread when operations come to here.Spin_lock must be fistly
 used to clear quota_root when disabling quota,while enabling
 quota spin_lock must be used to complete the last assign work.
 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/qgroup.c |   42 +++---
  1 files changed, 15 insertions(+), 27 deletions(-)
 
 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index e3598fa..7df372a 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -42,7 +42,6 @@
   *  - limit
   *  - caches fuer ulists
   *  - performance benchmarks
 - *  - check all ioctl parameters
   */
  
  /*
 @@ -98,7 +97,11 @@ struct btrfs_qgroup_list {
   struct btrfs_qgroup *member;
  };
  
 -/* must be called with qgroup_lock held */

instead it must be called with quota_lock held. The rest looks
correct to me.

-Arne

 +/*
 + * don't need to be held by spin_lock since
 + * all the quota configurations on memory has been protected
 + * by mutex quota_lock.
 + */


  static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
  u64 qgroupid)
  {
 @@ -793,13 +796,10 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
   int ret = 0;
   int slot;
  
 - spin_lock(fs_info-qgroup_lock);
   if (fs_info-quota_root) {
   fs_info-pending_quota_state = 1;
 - spin_unlock(fs_info-qgroup_lock);
 - goto out;
 + return ret;
   }
 - spin_unlock(fs_info-qgroup_lock);
  
   /*
* initially create the quota tree
 @@ -808,7 +808,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
  BTRFS_QUOTA_TREE_OBJECTID);
   if (IS_ERR(quota_root)) {
   ret =  PTR_ERR(quota_root);
 - goto out;
 + return ret;
   }
  
   path = btrfs_alloc_path();
 @@ -861,14 +861,11 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
   if (ret)
   goto out_free_path;
  
 - spin_lock(fs_info-qgroup_lock);
   qgroup = add_qgroup_rb(fs_info, found_key.offset);
   if (IS_ERR(qgroup)) {
 - spin_unlock(fs_info-qgroup_lock);
   ret = PTR_ERR(qgroup);
   goto out_free_path;
   }
 - spin_unlock(fs_info-qgroup_lock);
   }
   ret = btrfs_next_item(tree_root, path);
   if (ret  0)
 @@ -883,13 +880,12 @@ out_add_root:
   if (ret)
   goto out_free_path;
  
 - spin_lock(fs_info-qgroup_lock);
   qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
   if (IS_ERR(qgroup)) {
 - spin_unlock(fs_info-qgroup_lock);
   ret = PTR_ERR(qgroup);
   goto out_free_path;
   }
 + spin_lock(fs_info-qgroup_lock);
   fs_info-quota_root = quota_root;
   fs_info-pending_quota_state = 1;
   spin_unlock(fs_info-qgroup_lock);
 @@ -901,7 +897,6 @@ out_free_root:
   free_extent_buffer(quota_root-commit_root);
   kfree(quota_root);
   }
 -out:
   return ret;
  }
  
 @@ -912,11 +907,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle 
 *trans,
   struct btrfs_root *quota_root;
   int ret = 0;
  
 - spin_lock(fs_info-qgroup_lock);
 - if (!fs_info-quota_root) {
 - spin_unlock(fs_info-qgroup_lock);
 + if (!fs_info-quota_root)
   return 0;
 - }
 +
 + spin_lock(fs_info-qgroup_lock);
   fs_info-quota_enabled = 0;
   fs_info-pending_quota_state = 0;
   quota_root = fs_info-quota_root;
 @@ -1041,15 +1035,12 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle 
 *trans,
   return -EINVAL;
  
   /* check if there are no relations to this qgroup */
 - spin_lock(fs_info-qgroup_lock);
   qgroup = find_qgroup_rb(fs_info, qgroupid);
   if (qgroup) {
 - if (!list_empty(qgroup-groups) || 
 !list_empty(qgroup-members)) {
 - spin_unlock(fs_info-qgroup_lock);
 + if (!list_empty(qgroup-groups) ||
 + !list_empty(qgroup-members))
   return -EBUSY;
 - }
   }
 - spin_unlock(fs_info-qgroup_lock);
  
   ret = del_qgroup_item(trans, quota_root, qgroupid);
  
 @@ -1081,20 +1072,17 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle 
 *trans,
  (unsigned long long)qgroupid);
   }
  
 - 

Re: Adding a non-empty subvol to a qgroup

2013-03-22 Thread Arne Jansen
On 22.03.2013 13:03, Wang Shilong wrote:
 Hello Arne,
 
 Since quota rescan has not been implemented yet,
 
 overflow can happen, so until now, we can have a check when
 doing accounting in the kernel, if the referenced/exclusive is not
 enough to delete, we just make it to be 0 and give a warning.
 
 Otherwise, user may get a strange integer(because of type u64).
 How do you think ? or we just wait for the implement of rescan.

I think we already print it negatively. Please just leave it as
it is.

Thanks,
Arne

 
 Thanks,
 Wang
 
 All,

 When adding a subvolume to a qgroup, pre-existing files in that subvolume 
 are not counted in the referenced/exclusive space of the qgroup. Is this 
 intended behavior ?

 I create a subvol with one file:

  # mkfs.btrfs /dev/sdg
  # mount /dev/sdg /mnt/fulldisk
  # cd /mnt/fulldisk
  # btrfs quota enable ./
  # btrfs sub create sub1
  # dd if=/dev/zero of=sub1/file1 bs=10 count=1
  # sync
  # btrfs qgroup show ./
  0/257 106496 106496

 Now I create a new qgroup on level 1 and add the qgroup of sub1 to it :

  # btrfs qgroup create 1/0 ./
  # btrfs qgroup assign 0/257 1/0 ./
  # sync
  # btrfs fi sync ./
  # btrfs quota rescan ./
  # btrfs quota rescan ./sub1
  # btrfs qgroup show ./
  0/257 106496 106496
  1/0 0 0

 The pre-existing file does not contribute to the space numbers.

 Let's create a new file:

  # dd if=/dev/zero of=sub1/file2 bs=5 count=1
  # sync
  # btrfs qgroup show ./
  0/257 159744 159744
  1/0 53248 53248

 We see that only the new file is included in the space numbers.

 Now I remove the first file:

  # rm -f sub1/file1
  # sync
  # btrfs qgroup show ./
  0/257 57344 57344
  1/0 -49152 -49152

 The space numbers go below zero. Even if the behavior above is intended, the 
 removal of the pre-existing file should not result in negative space numbers.

 Koen.
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: about btrfs quota issues

2013-03-11 Thread Arne Jansen
On 10.03.2013 05:21, Shilong Wang wrote:
 Hello, Arne
 
   Steps to reproduce:
 
 
 mkfs.btrfs disk
 mount disk mnt
 btrfs quota enable mnt
 
 btrfs sub create mnt/sub
 btrfs qgroup create 1/1 mnt
 btrfs qgroup assign sub_qgroupid 1/1 mnt
 
 
 dd if=/dev/zero of=mnt/sub/data bs=1M count=1
 sync
 btrfs qgroup show  mnt
 #until now, every thing goes well, however, if snapshot 
 happens
 #the quota accounting will go wrong
 
btrfs sub snapshot mnt/sub mnt/snap
sync
btrfs qgroup show mnt
#the accounting information of group(1/1) is not expected
#here exclusive of group (1/1) do not change as expected.
 
 So i took a close look at the algorithm of quota accounting, the 3
 steps of algorithm don't
 consider some cases like the above example.
 
 In fact, i think you try to put some work on users, especially when
 snapshot happens.
 It is complex to track all the group's accounting when having
 snapshots..See the following
 commands.
 
 btrfs sub snapshot -c src_qgroupid:dst_qgroupid  mnt
 btrfs sub snapshot  -x src_qgroupid:dst_qgroupid mnt
 
 
 Are these commands designed for some cases regarding to
 snapshots/subvolume cases?

Yes, these commands would have helped you in the above case. You need to
create an empty qgroup and copy the exclusive from there on snapshot
creation.

 If so, i think it really confusing and too complex for users to do
 such work, is't it?...

It is complex. That is why I always point anyone asking to do some work
on btrfs or qgroups to writing an enhanced interface to simplify this
task for the user. I don't think the kernel should handle this.
And that's why I took the effort to write a pdf to explain the
concepts :)
But the current interface is not only complex, it also is very powerful.
You can solve problems with it that no other quota system I know of can
solve.

 
 BTW, i have a question about the function btrfs_qgroup_inherit(),
 when copying exclusive value from src_qgroup to dst_qgroup:
 
dst_qgroup-exclusive = src_qgroup-exclusive + level_size
 
 while copying referenced value from src_qgroup to dot_qgroup:
 
dst_qgroup-referenced = src_qgroup-referenced -level_size
 
 I can't really figure out...~_~

level_size is just a small correction for the space the tree root
occupies. The tree root is never shared between subvolumes.

-Arne

 
 Thanks,
 Wang

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: about btrfs quota issues

2013-03-11 Thread Arne Jansen
On 11.03.2013 14:31, Wang Shilong wrote:
 
 Hello,
 
snip


 In fact, i think you try to put some work on users, especially when
 snapshot happens.
 It is complex to track all the group's accounting when having
 snapshots..See the following
 commands.

 btrfs sub snapshot -c src_qgroupid:dst_qgroupid  mnt
 btrfs sub snapshot  -x src_qgroupid:dst_qgroupid mnt


 Are these commands designed for some cases regarding to
 snapshots/subvolume cases?

 Yes, these commands would have helped you in the above case. You need to
 create an empty qgroup and copy the exclusive from there on snapshot
 creation.
 
 I am wondering why we need the concept of exclusive.
 Maybe it helps to some extent
 

It is needed to answer the question 'how many space can I gain but
deleting this subvol or this set of subvolumes?'

 How about just  kicking it off, since the concepts of exclusive
 adds the complexity of btrfs quota.

If you don't need that value, just ignore the tracking error.

 
 The worst thing is that i don't think users can master this magic
 concept very well.

Normally users don't need very sophisticated scenarios. In fact, they
don't even need higher level quota groups, the basic tracking is
enough. In this case, everything just works as expected for the user.
If you start creating and assigning qgroups manually, prepare to handle
the complexity.

-Arne

 

 If so, i think it really confusing and too complex for users to do
 such work, is't it?...

 It is complex. That is why I always point anyone asking to do some work
 on btrfs or qgroups to writing an enhanced interface to simplify this
 task for the user. I don't think the kernel should handle this.
 And that's why I took the effort to write a pdf to explain the
 concepts :)
 
 I don't have any  good ideas about this yet..
 
 But the current interface is not only complex, it also is very powerful.
 You can solve problems with it that no other quota system I know of can
 solve.


 BTW, i have a question about the function btrfs_qgroup_inherit(),
 when copying exclusive value from src_qgroup to dst_qgroup:

   dst_qgroup-exclusive = src_qgroup-exclusive + level_size

 while copying referenced value from src_qgroup to dot_qgroup:

   dst_qgroup-referenced = src_qgroup-referenced -level_size

 I can't really figure out...~_~

 level_size is just a small correction for the space the tree root
 occupies. The tree root is never shared between sub volumes.
 
 O.K. I  got it..
 
 Thanks,
 Wang
 

 -Arne


 Thanks,
 Wang

 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: about btrfs quota issues

2013-03-11 Thread Arne Jansen
On 11.03.2013 15:15, Wang Shilong wrote:
 
 snip
 
 The worst thing is that i don't think users can master this magic
 concept very well.

 Normally users don't need very sophisticated scenarios. In fact, they
 don't even need higher level quota groups, the basic tracking is
 enough. In this case, everything just works as expected for the user.
 If you start creating and assigning qgroups manually, prepare to handle
 the complexity.

 Considering this case:
 
 a subvolume related to a user, we limit the space by limiting every subvolume
 qgroup, but  we also want to limit  the total space all the users can use. So 
 we create
 a parent qgroup(1/1 for example) and assign all subvolume group to this 
 parent group.
 
 The above case is regularly used i think, What's more, many snapshots may be 
 done.
 So  i think what i am concerning is not a corner case..

So you just missed to assign the new subvolume to 1/1 by using -i on
snapshot creation.

-Arne

 
 Thanks,
 Wang



 If so, i think it really confusing and too complex for users to do
 such work, is't it?...

 It is complex. That is why I always point anyone asking to do some work
 on btrfs or qgroups to writing an enhanced interface to simplify this
 task for the user. I don't think the kernel should handle this.
 And that's why I took the effort to write a pdf to explain the
 concepts :)

 I don't have any  good ideas about this yet..

 But the current interface is not only complex, it also is very powerful.
 You can solve problems with it that no other quota system I know of can
 solve.


 BTW, i have a question about the function btrfs_qgroup_inherit(),
 when copying exclusive value from src_qgroup to dst_qgroup:

  dst_qgroup-exclusive = src_qgroup-exclusive + level_size

 while copying referenced value from src_qgroup to dot_qgroup:

  dst_qgroup-referenced = src_qgroup-referenced -level_size

 I can't really figure out...~_~

 level_size is just a small correction for the space the tree root
 occupies. The tree root is never shared between sub volumes.

 O.K. I  got it..

 Thanks,
 Wang


 -Arne


 Thanks,
 Wang



 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: about btrfs quota issues

2013-03-11 Thread Arne Jansen
On 11.03.2013 15:35, Wang Shilong wrote:
 
 On 11.03.2013 15:15, Wang Shilong wrote:

 snip

 The worst thing is that i don't think users can master this magic
 concept very well.

 Normally users don't need very sophisticated scenarios. In fact, they
 don't even need higher level quota groups, the basic tracking is
 enough. In this case, everything just works as expected for the user.
 If you start creating and assigning qgroups manually, prepare to handle
 the complexity.

 Considering this case:

 a subvolume related to a user, we limit the space by limiting every 
 subvolume
 qgroup, but  we also want to limit  the total space all the users can use. 
 So we create
 a parent qgroup(1/1 for example) and assign all subvolume group to this 
 parent group.

 The above case is regularly used i think, What's more, many snapshots may 
 be done.
 So  i think what i am concerning is not a corner case..

 So you just missed to assign the new subvolume to 1/1 by using -i on
 snapshot creation.

 
 When snapshot happens,  the exclusive of 1/1 will go wrong even with  this 
 simple case..

Your example does not describe your use case. If you want to account the
snapshot to the user, you also have to assign the snapshot to 1/1. If you
do so, the exclusive will be correct.

-Arne

 
 However, thanks very much for your patience and kindly reply ^_^
 
 Thanks, 
 Wang
 
 -Arne


 Thanks,
 Wang



 If so, i think it really confusing and too complex for users to do
 such work, is't it?...

 It is complex. That is why I always point anyone asking to do some work
 on btrfs or qgroups to writing an enhanced interface to simplify this
 task for the user. I don't think the kernel should handle this.
 And that's why I took the effort to write a pdf to explain the
 concepts :)

 I don't have any  good ideas about this yet..

 But the current interface is not only complex, it also is very powerful.
 You can solve problems with it that no other quota system I know of can
 solve.


 BTW, i have a question about the function btrfs_qgroup_inherit(),
 when copying exclusive value from src_qgroup to dst_qgroup:

 dst_qgroup-exclusive = src_qgroup-exclusive + level_size

 while copying referenced value from src_qgroup to dot_qgroup:

 dst_qgroup-referenced = src_qgroup-referenced -level_size

 I can't really figure out...~_~

 level_size is just a small correction for the space the tree root
 occupies. The tree root is never shared between sub volumes.

 O.K. I  got it..

 Thanks,
 Wang


 -Arne


 Thanks,
 Wang





 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Heavy memory leak when using quota groups

2013-02-27 Thread Arne Jansen
On 02/15/13 05:34, shyam btrfs wrote:
 Hi Arne,
 
 Yes this addresses the crash during mount. Thanks.
 
 But still I have the consistent out-of-memory issue when running
 random 4K writes with qgroup enabled. I will see if I can get some
 steps for you to recreate the problem.

Can you please try to reproduce this with 3.6? If it doesn't happen
there, could you try to git bisect it?

Thanks,
Arne

 
 --Shyam
 
 On Wed, Feb 13, 2013 at 4:52 PM, Arne Jansen sensi...@gmx.net wrote:
 Hi Shyam,

 I sent a patch to the list

 [PATCH] Btrfs: fix crash in log replay with qgroups enabled

 that hopefully addresses this problem. As I haven't been able to
 reproduce it, I also haven't been able to test it.
 Could be please see if it fixes your problem and makes the filesystem
 mountable again?

 Thanks,
 Arne

 On 12.02.2013 08:31, shyam btrfs wrote:
 Also immediately after this problem, its impossible to mount the
 filesystem. it consistently fails with

 [ 2092.254428] BUG: unable to handle kernel NULL pointer dereference
 at 03c4
 [ 2092.255945] IP: [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257340] PGD 23d42067 PUD 3a93a067 PMD 0
 [ 2092.257982] Oops:  [#1] SMP
 [ 2092.257982] Modules linked in: raid1 xt_multiport xt_tcpudp
 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter
 ip_tables x_tables iscsi_tcp libiscsi_tcp libiscsi
 scsi_transport_iscsi xfrm_user xfrm4_tunnel tunnel4 ipcomp xfrm_ipcomp
 esp4 ah4 8021q garp stp llc bonding btrfs(OF) deflate zlib_deflate ctr
 twofish_generic twofish_x86_64_3way twofish_x86_64 twofish_common
 camellia_generic camellia_x86_64 serpent_sse2_x86_64 glue_helper lrw
 serpent_generic xts gf128mul blowfish_generic blowfish_x86_64
 blowfish_common ablk_helper cryptd cast5_generic cast_common
 des_generic xcbc rmd160 crypto_null af_key xfrm_algo scst_vdisk(OF)
 iscsi_scst(OF) scst(OF) libcrc32c microcode nfsv4 psmouse nfsd(OF)
 virtio_balloon nfs_acl serio_raw auth_rpcgss nfs fscache lockd sunrpc
 lp parport floppy ixgbevf
 [ 2092.257982] CPU 0
 [ 2092.257982] Pid: 27156, comm: mount Tainted: GF  O
 3.8.0-030800rc5-generic #201301251535 Bochs Bochs
 [ 2092.257982] RIP: 0010:[a033d0be]  [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257982] RSP: 0018:88003752f598  EFLAGS: 00010206
 [ 2092.257982] RAX:  RBX: 0001 RCX: 
 880017826560
 [ 2092.257982] RDX: 0f83e0f83e0f83e1 RSI: 0066 RDI: 
 8800374bda00
 [ 2092.257982] RBP: 88003752f628 R08: 880019dfc000 R09: 
 88003752f508
 [ 2092.257982] R10: 000c R11:  R12: 
 880018d60800
 [ 2092.257982] R13: 88001c3bd900 R14: 88001c3ce158 R15: 
 8800
 [ 2092.257982] FS:  7fdc62688800() GS:88003fc0()
 knlGS:
 [ 2092.257982] CS:  0010 DS:  ES:  CR0: 8005003b
 [ 2092.257982] CR2: 03c4 CR3: 3a91a000 CR4: 
 06f0
 [ 2092.257982] DR0:  DR1:  DR2: 
 
 [ 2092.257982] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 2092.257982] Process mount (pid: 27156, threadinfo 88003752e000,
 task 880018ea5d00)
 [ 2092.257982] Stack:
 [ 2092.257982]  88003752f5c8 88003d554480 880017826560
 880019dfc000
 [ 2092.257982]  18d60800  
 880018729498
 [ 2092.257982]  00dc 0001 88001c3ce158
 1c3bd900
 [ 2092.257982] Call Trace:
 [ 2092.257982]  [a03b23f3] __resolve_indirect_refs+0x173/0x620 
 [btrfs]
 [ 2092.257982]  [a037aa17] ? free_extent_buffer+0x37/0x90 [btrfs]
 [ 2092.257982]  [a03b316a] find_parent_nodes+0x7da/0xf90 [btrfs]
 [ 2092.257982]  [a03b39b9] btrfs_find_all_roots+0x99/0x100 [btrfs]
 [ 2092.257982]  [81183beb] ? kfree+0x3b/0x150
 [ 2092.257982]  [a03b691b] btrfs_qgroup_account_ref+0xfb/0x550 
 [btrfs]
 [ 2092.257982]  [a0346088] ?
 btrfs_delayed_refs_qgroup_accounting+0x58/0x100 [btrfs]
 [ 2092.257982]  [81183cc4] ? kfree+0x114/0x150
 [ 2092.257982]  [a03460d3]
 btrfs_delayed_refs_qgroup_accounting+0xa3/0x100 [btrfs]
 [ 2092.257982]  [a034d269] btrfs_run_delayed_refs+0x49/0x2f0 
 [btrfs]
 [ 2092.257982]  [a0373f43] ?
 btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
 [ 2092.257982]  [a035ce25] btrfs_commit_transaction+0x85/0xad0 
 [btrfs]
 [ 2092.257982]  [a033c5de] ? btrfs_search_slot+0x2fe/0x7a0 [btrfs]
 [ 2092.257982]  [8107fc70] ? add_wait_queue+0x60/0x60
 [ 2092.257982]  [81183d42] ? kmem_cache_free+0x42/0x160
 [ 2092.257982]  [a03754c1] ?
 release_extent_buffer.isra.26+0x81/0xf0 [btrfs]
 [ 2092.257982]  [a0396aa5] btrfs_recover_log_trees+0x335/0x3b0 
 [btrfs]
 [ 2092.257982]  [a03953d0] ?
 fixup_inode_link_counts+0x150/0x150 [btrfs]
 [ 2092.257982

Re: [PATCH 1/2] Btrfs: create the qgroup that limits root subvolume automatically

2013-02-22 Thread Arne Jansen
On 02/22/13 13:02, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com
 
 Creating the root subvolume qgroup when enabling quota,with

Why only create a qgroup for the root subvolume and not for
every existing subvolume?

 this patch,it will be ok to limit the whole filesystem size.

This will not limit the whole filesystem, but only the root
subvolume. To limit the whole filesystem you'd have to create
a level 1 qgroup and add all subvolumes to it.

-Arne

 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 Cc: Arne Jansen sensi...@gmx.net
 ---
  fs/btrfs/qgroup.c |   12 
  1 files changed, 12 insertions(+), 0 deletions(-)
 
 diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
 index a5c8562..c409096 100644
 --- a/fs/btrfs/qgroup.c
 +++ b/fs/btrfs/qgroup.c
 @@ -777,6 +777,7 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
   struct extent_buffer *leaf;
   struct btrfs_key key;
   int ret = 0;
 + struct btrfs_qgroup *qgroup = NULL;
  
   spin_lock(fs_info-qgroup_lock);
   if (fs_info-quota_root) {
 @@ -823,7 +824,18 @@ int btrfs_quota_enable(struct btrfs_trans_handle *trans,
  
   btrfs_mark_buffer_dirty(leaf);
  
 + btrfs_release_path(path);
 + ret = add_qgroup_item(trans, quota_root, BTRFS_FS_TREE_OBJECTID);
 + if (ret)
 + goto out;
 +
   spin_lock(fs_info-qgroup_lock);
 + qgroup = add_qgroup_rb(fs_info, BTRFS_FS_TREE_OBJECTID);
 + if (IS_ERR(qgroup)) {
 + spin_unlock(fs_info-qgroup_lock);
 + ret = PTR_ERR(qgroup);
 + goto out;
 + }
   fs_info-quota_root = quota_root;
   fs_info-pending_quota_state = 1;
   spin_unlock(fs_info-qgroup_lock);
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RESEND RFC PATCH 2/2] Btrfs: disable the qgroup level 0 for userspace use

2013-02-22 Thread Arne Jansen
On 02/22/13 13:09, Wang Shilong wrote:
 From: Wang Shilong wangsl-f...@cn.fujitsu.com
 
 This patch tries to stop users to create/destroy qgroup level 0,
 users can only create/destroy qgroup level more than 0.
 
 See the fact:
   a subvolume/snapshot qgroup was created automatically
 when creating subvolume/snapshot, so creating a qgroup level 0 can't
 be a subvolume/snapshot qgroup, the only way to use it is that assigning
 subvolume/snapshot qgroup to it, the point is that we don't want to have a
 parent qgroup whose level is 0.
 
   So we want to force users to use qgroup with clear relations
 which means a parent qgroup's level  child qgroup's level.For example:
 
   2/0
  /\
 /  \
/\
   1/0   1/1
 / \\
/   \\   
   / \\
   0/256 0/2570/258
 
 This pattern of quota is nature and easy for users to understand, otherwise 
 it will
 make the quota configuration confusing and difficult to maintain.

I agree that a strict hierarchy of the levels should be enforced.
Currently the kernel has no idea of 'level', it's just an artificial
concept that lives in userspace. This patch would be the first place
to add that magic shift '48' to the kernel.
In my opinion it would be sufficient to do the enforcement in user
space, as it is of no technical nature.

-Arne

 
 Signed-off-by: Wang Shilong wangsl-f...@cn.fujitsu.com
 Acked-by: Miao Xie mi...@cn.fujitsu.com
 Cc: Arne Jansen sensi...@gmx.net
 ---
  fs/btrfs/ioctl.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
 index a31cd93..3590c21 100644
 --- a/fs/btrfs/ioctl.c
 +++ b/fs/btrfs/ioctl.c
 @@ -3755,7 +3755,7 @@ static long btrfs_ioctl_qgroup_create(struct file 
 *file, void __user *arg)
   goto drop_write;
   }
  
 - if (!sa-qgroupid) {
 + if (!(sa-qgroupid  48)) {
   ret = -EINVAL;
   goto out;
   }
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Tests] xfs test[299]:Btrfs hierarchical Quotas

2013-02-21 Thread Arne Jansen
On 02/22/13 07:12, Hemanth Kumar wrote:
 
 Signed-off-by: Hemanth Kumar hemanthkuma...@gmail.com
 ---
  299 | 38 ++
  299.out | 20 
  2 files changed, 58 insertions(+)
  create mode 100644 299
  create mode 100644 299.out
 
 diff --git a/299 b/299
 new file mode 100644
 index 000..6b03438
 --- /dev/null
 +++ b/299
 @@ -0,0 +1,38 @@
 +#! /bin/bash
 +# FS QA Test No. 299
 +#
 +# Test btrfs's hierarchical quotas
 +#
 +#--
 +#
 +# creator
 +owner=hemanthkuma...@gmail.com
 +
 +seq=`basename $0`
 +echo QA output created by $seq
 +
 +here=`pwd`
 +tmp=/tmp/$$
 +status=1# failure is the default!
 +
 +_cleanup()
 +{
 +rm -rf $tmp.*
 +}
 +
 +trap _cleanup ; exit \$status 0 1 2 3 15
 +
 +#Enabeling btrfs qutas
 +btrfs quota enable $TEST_DIR
 +echo quota enabled on $TEST_DEV
 +btrfs subvolume create $TEST_DIR/vol1
 +btrfs subvolume create $TEST_DIR/vol1/vol2
 +btrfs subvolume create $TEST_DIR/vol1/vol2/vol3
 +btrfs qgroup limit 5m $TEST_DIR/vol1
 +btrfs qgroup limit 3m $TEST_DIR/vol1/vol2
 +btrfs qgroup limit 2m $TEST_DIR/vol1/vol2/vol3
 +dd if=$TEST_DEV of=$TEST_DIR/vol1/vol2/vol3/file1 bs=3M count=1
 +dd if=$TEST_DEV of=$TEST_DIR/vol1/vol2/file1 bs=2M count=1
 +dd if=$TEST_DEV of=$TEST_DIR/vol1/file1 bs=5M count=1

It is not really clear to me what you are trying to test.
This does not really test hierarchical quota, but just 3
independent quotas. To make them hierarchical, you have
to create higher level qgroups.

 +btrfs qgroup show $TEST_DIR
 +exit
 diff --git a/299.out b/299.out
 new file mode 100644
 index 000..f9a6b96
 --- /dev/null
 +++ b/299.out
 @@ -0,0 +1,20 @@
 +QA output created by 299
 +quota enabled on /dev/sdc5
 +Create subvolume '/test/vol1'
 +Create subvolume '/test/vol1/vol2'
 +Create subvolume '/test/vol1/vol2/vol3'
 +dd: writing ‘/test/vol1/vol2/vol3/file1’: Disk quota exceeded
 +1+0 records in
 +0+0 records out
 +2031616 bytes (2.0 MB) copied, 0.128843 s, 15.8 MB/s
 +1+0 records in
 +1+0 records out
 +2097152 bytes (2.1 MB) copied, 0.00884457 s, 237 MB/s
 +dd: writing ‘/test/vol1/file1’: Disk quota exceeded
 +1+0 records in
 +0+0 records out
 +5177344 bytes (5.2 MB) copied, 0.0732531 s, 70.7 MB/s
 +0/257 4096 4096
 +0/258 4096 4096
 +0/259 4096 4096

This is probably not the expected output, as the written data
did not show up yet. You should add a btrfs fi sync before the qgroup
show command to force the delayed writes to disk. Otherwise they're
not accounted for yet.

-Arne

 +
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Kernel panic when scrub is used

2013-02-18 Thread Arne Jansen
On 02/18/13 18:14, Jérôme Poulin wrote:
 I experience a kernel panic with General protection fault when doing
 a scrub on Kernel 3.8-rc7.
 
 Here is a screenshot: http://tinypic.com/r/34r6nad/6

I'd love to see the first stacktrace...

 
 The weird part is that the scrub completes from initramfs, but when
 system is fully booted, is kernel panics every time in the low
 percentage. (10%)
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Kernel panic when scrub is used

2013-02-18 Thread Arne Jansen
On 02/18/13 18:53, Jérôme Poulin wrote:
 Here you go, I also added 2 other screenshots of the same problem.
 http://tinypic.com/r/5ckgug/6
 http://tinypic.com/r/t0i9t4/6
 http://tinypic.com/r/2r3xdvl/6

do you have any idea how I can reproduce it here?

-Arne

 
 On Mon, Feb 18, 2013 at 12:37 PM, Arne Jansen li...@die-jansens.de wrote:
 On 02/18/13 18:14, Jérôme Poulin wrote:
 I experience a kernel panic with General protection fault when doing
 a scrub on Kernel 3.8-rc7.

 Here is a screenshot: http://tinypic.com/r/34r6nad/6

 I'd love to see the first stacktrace...


 The weird part is that the scrub completes from initramfs, but when
 system is fully booted, is kernel panics every time in the low
 percentage. (10%)
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html



--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Heavy memory leak when using quota groups

2013-02-13 Thread Arne Jansen
On 12.02.2013 08:25, shyam btrfs wrote:
 Hi Arne, Jan,
 
 I am using btrfs directly from Linux 3.8rc5 (commit
 949db153b6466c6f7cad5a427ecea94985927311). I am trying to use qgroups
 functionality  with a basic random-write workload, it constantly
 keeps leaking memory  within few minutes of IO, there is either
 out-of-memory killer trying to kill some tasks or there are
 page-allocation failures that btrfs or other kernel module
 experiences.
 
 This issue is consistently reproducible  to do that this is what I do:
 # mkfs.btrfs on a blockdevice
 # mount with rw,noatime,nodatasum,nodatacow,nospace_cache  options
 # btrfs quota enable /mntpoint
 # btrfs qgroup create 100 /mntpoint
 # I setup a subvolume under the mountpoint  fallocate a file of fixed size.
 # Perform 4K write random IO with 4 worker threads with the
 application opening with O_SYNC flag (i.e. there is a fsync() after
 each write). To do this I am exposing the fallocate'd file as a SCST
 iSCSI target lun  doing IO's, but I presume this can be easily
 reproduced with any other IO generator.

Unfortunately I haven't been able to reproduce it yet (using stress.sh),
but I keep trying. Are you sure it is really leaking memory or just needing
more than is available? If you abort the test in the middle, can you reclaim
all your memory by unmounting?

Thanks,
Arne

 
 I confirmed that if I dont do quota enable/qgroup create, I don't run
 into this issue. So there is somewhere a memory leak with quota-group.
 
 Below is one stack trace where qgroup accounting code runs into
 allocation failure.
 
 [ 5123.800178] btrfs-endio-wri: page allocation failure: order:0, mode:0x20
 [ 5123.800188] Pid: 27508, comm: btrfs-endio-wri Tainted: GF
 O 3.8.0-030800rc5-generic #201301251535
 [ 5123.800190] Call Trace:
 [ 5123.800204]  [8113a656] warn_alloc_failed+0xf6/0x150
 [ 5123.800208]  [8113e28e] __alloc_pages_nodemask+0x76e/0x9b0
 [ 5123.800213]  [81182945] ? new_slab+0x125/0x1a0
 [ 5123.800216]  [81185c2c] ? kmem_cache_alloc+0x11c/0x140
 [ 5123.800221]  [8117a66a] alloc_pages_current+0xba/0x170
 [ 5123.800239]  [a055f794] btrfs_clone_extent_buffer+0x64/0xe0 
 [btrfs]
 [ 5123.800245]  [a051fb33] btrfs_search_old_slot+0xb3/0x940 [btrfs]
 [ 5123.800252]  [810f78f7] ? call_rcu_sched+0x17/0x20
 [ 5123.800263]  [a055849e] ?
 release_extent_buffer.isra.26+0x5e/0xf0 [btrfs]
 [ 5123.800273]  [a055da17] ? free_extent_buffer+0x37/0x90 [btrfs]
 [ 5123.800280]  [a0522d5d] btrfs_next_old_leaf+0xed/0x450 [btrfs]
 [ 5123.800290]  [a05955b4] __resolve_indirect_refs+0x334/0x620 
 [btrfs]
 [ 5123.800301]  [a059616a] find_parent_nodes+0x7da/0xf90 [btrfs]
 [ 5123.800311]  [a05969b9] btrfs_find_all_roots+0x99/0x100 [btrfs]
 [ 5123.800313]  [81183beb] ? kfree+0x3b/0x150
 [ 5123.800323]  [a059991b] btrfs_qgroup_account_ref+0xfb/0x550 
 [btrfs]
 [ 5123.800325]  [81183beb] ? kfree+0x3b/0x150
 [ 5123.800332]  [a05290d3]
 btrfs_delayed_refs_qgroup_accounting+0xa3/0x100 [btrfs]
 [ 5123.800341]  [a0540941] __btrfs_end_transaction+0x81/0x410 
 [btrfs]
 [ 5123.800349]  [a052b646] ?
 btrfs_delalloc_release_metadata+0x106/0x180 [btrfs]
 [ 5123.800358]  [a0540d20] btrfs_end_transaction+0x10/0x20 [btrfs]
 [ 5123.800367]  [a054965d] btrfs_finish_ordered_io+0x10d/0x3d0 
 [btrfs]
 [ 5123.800374]  [8106a3a0] ? cascade+0xa0/0xa0
 [ 5123.800384]  [a0549935] finish_ordered_fn+0x15/0x20 [btrfs]
 [ 5123.800394]  [a056ac2f] worker_loop+0x16f/0x5d0 [btrfs]
 [ 5123.800401]  [810888a8] ? __wake_up_common+0x58/0x90
 [ 5123.800411]  [a056aac0] ? btrfs_queue_worker+0x310/0x310 [btrfs]
 [ 5123.800415]  [8107f080] kthread+0xc0/0xd0
 [ 5123.800417]  [8107efc0] ? flush_kthread_worker+0xb0/0xb0
 [ 5123.800423]  [816f452c] ret_from_fork+0x7c/0xb0
 [ 5123.800425]  [8107efc0] ? flush_kthread_worker+0xb0/0xb0
 
 I have attached meminfo/slabinfo that I peridically captured before
 running the test  while the test was going through. Also attached are
 sysrq outputs + debug-tree output after the problem happened.
 
 Can you pls check whats happening with quota-groups? Thanks.
 
 --Shyam
 
 
 Immediately after the allocation failure, there is this panic
 [ 5123.811593] [ cut here ]
 [ 5123.813996] Kernel BUG at a055f7f7 [verbose debug info unavailable]
 [ 5123.815286] invalid opcode:  [#1] SMP
 [ 5123.816131] Modules linked in: btrfs(OF) raid1 xt_multiport
 xt_tcpudp nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack
 iptable_filter ip_tables x_tables iscsi_tcp libiscsi_tcp libiscsi
 scsi_transport_iscsi xfrm_user xfrm4_tunnel tunnel4 ipcomp xfrm_ipcomp
 esp4 ah4 8021q garp stp llc bonding deflate zlib_deflate ctr
 twofish_generic twofish_x86_64_3way twofish_x86_64 twofish_common
 camellia_generic camellia_x86_64 serpent_sse2_x86_64 

Re: Heavy memory leak when using quota groups

2013-02-13 Thread Arne Jansen
On 12.02.2013 08:31, shyam btrfs wrote:
 Also immediately after this problem, its impossible to mount the
 filesystem. it consistently fails with

no luck here either. I tried a sync-heavy workload and crashed it
in the middle. mount took a while but succeeded.
Maybe your debug tree contains enough information to figure out what's
going on.

-Arne

 
 [ 2092.254428] BUG: unable to handle kernel NULL pointer dereference
 at 03c4
 [ 2092.255945] IP: [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257340] PGD 23d42067 PUD 3a93a067 PMD 0
 [ 2092.257982] Oops:  [#1] SMP
 [ 2092.257982] Modules linked in: raid1 xt_multiport xt_tcpudp
 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter
 ip_tables x_tables iscsi_tcp libiscsi_tcp libiscsi
 scsi_transport_iscsi xfrm_user xfrm4_tunnel tunnel4 ipcomp xfrm_ipcomp
 esp4 ah4 8021q garp stp llc bonding btrfs(OF) deflate zlib_deflate ctr
 twofish_generic twofish_x86_64_3way twofish_x86_64 twofish_common
 camellia_generic camellia_x86_64 serpent_sse2_x86_64 glue_helper lrw
 serpent_generic xts gf128mul blowfish_generic blowfish_x86_64
 blowfish_common ablk_helper cryptd cast5_generic cast_common
 des_generic xcbc rmd160 crypto_null af_key xfrm_algo scst_vdisk(OF)
 iscsi_scst(OF) scst(OF) libcrc32c microcode nfsv4 psmouse nfsd(OF)
 virtio_balloon nfs_acl serio_raw auth_rpcgss nfs fscache lockd sunrpc
 lp parport floppy ixgbevf
 [ 2092.257982] CPU 0
 [ 2092.257982] Pid: 27156, comm: mount Tainted: GF  O
 3.8.0-030800rc5-generic #201301251535 Bochs Bochs
 [ 2092.257982] RIP: 0010:[a033d0be]  [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257982] RSP: 0018:88003752f598  EFLAGS: 00010206
 [ 2092.257982] RAX:  RBX: 0001 RCX: 
 880017826560
 [ 2092.257982] RDX: 0f83e0f83e0f83e1 RSI: 0066 RDI: 
 8800374bda00
 [ 2092.257982] RBP: 88003752f628 R08: 880019dfc000 R09: 
 88003752f508
 [ 2092.257982] R10: 000c R11:  R12: 
 880018d60800
 [ 2092.257982] R13: 88001c3bd900 R14: 88001c3ce158 R15: 
 8800
 [ 2092.257982] FS:  7fdc62688800() GS:88003fc0()
 knlGS:
 [ 2092.257982] CS:  0010 DS:  ES:  CR0: 8005003b
 [ 2092.257982] CR2: 03c4 CR3: 3a91a000 CR4: 
 06f0
 [ 2092.257982] DR0:  DR1:  DR2: 
 
 [ 2092.257982] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 2092.257982] Process mount (pid: 27156, threadinfo 88003752e000,
 task 880018ea5d00)
 [ 2092.257982] Stack:
 [ 2092.257982]  88003752f5c8 88003d554480 880017826560
 880019dfc000
 [ 2092.257982]  18d60800  
 880018729498
 [ 2092.257982]  00dc 0001 88001c3ce158
 1c3bd900
 [ 2092.257982] Call Trace:
 [ 2092.257982]  [a03b23f3] __resolve_indirect_refs+0x173/0x620 
 [btrfs]
 [ 2092.257982]  [a037aa17] ? free_extent_buffer+0x37/0x90 [btrfs]
 [ 2092.257982]  [a03b316a] find_parent_nodes+0x7da/0xf90 [btrfs]
 [ 2092.257982]  [a03b39b9] btrfs_find_all_roots+0x99/0x100 [btrfs]
 [ 2092.257982]  [81183beb] ? kfree+0x3b/0x150
 [ 2092.257982]  [a03b691b] btrfs_qgroup_account_ref+0xfb/0x550 
 [btrfs]
 [ 2092.257982]  [a0346088] ?
 btrfs_delayed_refs_qgroup_accounting+0x58/0x100 [btrfs]
 [ 2092.257982]  [81183cc4] ? kfree+0x114/0x150
 [ 2092.257982]  [a03460d3]
 btrfs_delayed_refs_qgroup_accounting+0xa3/0x100 [btrfs]
 [ 2092.257982]  [a034d269] btrfs_run_delayed_refs+0x49/0x2f0 [btrfs]
 [ 2092.257982]  [a0373f43] ?
 btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
 [ 2092.257982]  [a035ce25] btrfs_commit_transaction+0x85/0xad0 
 [btrfs]
 [ 2092.257982]  [a033c5de] ? btrfs_search_slot+0x2fe/0x7a0 [btrfs]
 [ 2092.257982]  [8107fc70] ? add_wait_queue+0x60/0x60
 [ 2092.257982]  [81183d42] ? kmem_cache_free+0x42/0x160
 [ 2092.257982]  [a03754c1] ?
 release_extent_buffer.isra.26+0x81/0xf0 [btrfs]
 [ 2092.257982]  [a0396aa5] btrfs_recover_log_trees+0x335/0x3b0 
 [btrfs]
 [ 2092.257982]  [a03953d0] ?
 fixup_inode_link_counts+0x150/0x150 [btrfs]
 [ 2092.257982]  [a035ae96] open_ctree+0x1646/0x1d70 [btrfs]
 [ 2092.257982]  [a0333bbb] btrfs_mount+0x57b/0x670 [btrfs]
 [ 2092.257982]  [8119e543] mount_fs+0x43/0x1b0
 [ 2092.257982]  [811b92e6] vfs_kern_mount+0x76/0x120
 [ 2092.257982]  [811ba761] do_new_mount+0xb1/0x1e0
 [ 2092.257982]  [811bbf76] do_mount+0x1b6/0x1f0
 [ 2092.257982]  [811bc040] sys_mount+0x90/0xe0
 [ 2092.257982]  [816f45dd] system_call_fastpath+0x1a/0x1f
 [ 2092.257982] Code: 00 48 03 10 48 89 d0 48 ba 00 00 00 00 00 88 ff
 ff 48 c1 f8 06 48 c1 e0 0c 8b 74 10 60 49 8b 40 40 48 ba e1 

Re: Heavy memory leak when using quota groups

2013-02-13 Thread Arne Jansen
On 12.02.2013 08:31, shyam btrfs wrote:
 Also immediately after this problem, its impossible to mount the
 filesystem. it consistently fails with
 
 [ 2092.254428] BUG: unable to handle kernel NULL pointer dereference
 at 03c4
 [ 2092.255945] IP: [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]

can you please resolve this address to a line number?

gdb btrfs.ko
(gdb) info line *btrfs_search_old_slot+0x63e

Thanks,
Arne

 [ 2092.257340] PGD 23d42067 PUD 3a93a067 PMD 0
 [ 2092.257982] Oops:  [#1] SMP
 [ 2092.257982] Modules linked in: raid1 xt_multiport xt_tcpudp
 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter
 ip_tables x_tables iscsi_tcp libiscsi_tcp libiscsi
 scsi_transport_iscsi xfrm_user xfrm4_tunnel tunnel4 ipcomp xfrm_ipcomp
 esp4 ah4 8021q garp stp llc bonding btrfs(OF) deflate zlib_deflate ctr
 twofish_generic twofish_x86_64_3way twofish_x86_64 twofish_common
 camellia_generic camellia_x86_64 serpent_sse2_x86_64 glue_helper lrw
 serpent_generic xts gf128mul blowfish_generic blowfish_x86_64
 blowfish_common ablk_helper cryptd cast5_generic cast_common
 des_generic xcbc rmd160 crypto_null af_key xfrm_algo scst_vdisk(OF)
 iscsi_scst(OF) scst(OF) libcrc32c microcode nfsv4 psmouse nfsd(OF)
 virtio_balloon nfs_acl serio_raw auth_rpcgss nfs fscache lockd sunrpc
 lp parport floppy ixgbevf
 [ 2092.257982] CPU 0
 [ 2092.257982] Pid: 27156, comm: mount Tainted: GF  O
 3.8.0-030800rc5-generic #201301251535 Bochs Bochs
 [ 2092.257982] RIP: 0010:[a033d0be]  [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257982] RSP: 0018:88003752f598  EFLAGS: 00010206
 [ 2092.257982] RAX:  RBX: 0001 RCX: 
 880017826560
 [ 2092.257982] RDX: 0f83e0f83e0f83e1 RSI: 0066 RDI: 
 8800374bda00
 [ 2092.257982] RBP: 88003752f628 R08: 880019dfc000 R09: 
 88003752f508
 [ 2092.257982] R10: 000c R11:  R12: 
 880018d60800
 [ 2092.257982] R13: 88001c3bd900 R14: 88001c3ce158 R15: 
 8800
 [ 2092.257982] FS:  7fdc62688800() GS:88003fc0()
 knlGS:
 [ 2092.257982] CS:  0010 DS:  ES:  CR0: 8005003b
 [ 2092.257982] CR2: 03c4 CR3: 3a91a000 CR4: 
 06f0
 [ 2092.257982] DR0:  DR1:  DR2: 
 
 [ 2092.257982] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 2092.257982] Process mount (pid: 27156, threadinfo 88003752e000,
 task 880018ea5d00)
 [ 2092.257982] Stack:
 [ 2092.257982]  88003752f5c8 88003d554480 880017826560
 880019dfc000
 [ 2092.257982]  18d60800  
 880018729498
 [ 2092.257982]  00dc 0001 88001c3ce158
 1c3bd900
 [ 2092.257982] Call Trace:
 [ 2092.257982]  [a03b23f3] __resolve_indirect_refs+0x173/0x620 
 [btrfs]
 [ 2092.257982]  [a037aa17] ? free_extent_buffer+0x37/0x90 [btrfs]
 [ 2092.257982]  [a03b316a] find_parent_nodes+0x7da/0xf90 [btrfs]
 [ 2092.257982]  [a03b39b9] btrfs_find_all_roots+0x99/0x100 [btrfs]
 [ 2092.257982]  [81183beb] ? kfree+0x3b/0x150
 [ 2092.257982]  [a03b691b] btrfs_qgroup_account_ref+0xfb/0x550 
 [btrfs]
 [ 2092.257982]  [a0346088] ?
 btrfs_delayed_refs_qgroup_accounting+0x58/0x100 [btrfs]
 [ 2092.257982]  [81183cc4] ? kfree+0x114/0x150
 [ 2092.257982]  [a03460d3]
 btrfs_delayed_refs_qgroup_accounting+0xa3/0x100 [btrfs]
 [ 2092.257982]  [a034d269] btrfs_run_delayed_refs+0x49/0x2f0 [btrfs]
 [ 2092.257982]  [a0373f43] ?
 btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
 [ 2092.257982]  [a035ce25] btrfs_commit_transaction+0x85/0xad0 
 [btrfs]
 [ 2092.257982]  [a033c5de] ? btrfs_search_slot+0x2fe/0x7a0 [btrfs]
 [ 2092.257982]  [8107fc70] ? add_wait_queue+0x60/0x60
 [ 2092.257982]  [81183d42] ? kmem_cache_free+0x42/0x160
 [ 2092.257982]  [a03754c1] ?
 release_extent_buffer.isra.26+0x81/0xf0 [btrfs]
 [ 2092.257982]  [a0396aa5] btrfs_recover_log_trees+0x335/0x3b0 
 [btrfs]
 [ 2092.257982]  [a03953d0] ?
 fixup_inode_link_counts+0x150/0x150 [btrfs]
 [ 2092.257982]  [a035ae96] open_ctree+0x1646/0x1d70 [btrfs]
 [ 2092.257982]  [a0333bbb] btrfs_mount+0x57b/0x670 [btrfs]
 [ 2092.257982]  [8119e543] mount_fs+0x43/0x1b0
 [ 2092.257982]  [811b92e6] vfs_kern_mount+0x76/0x120
 [ 2092.257982]  [811ba761] do_new_mount+0xb1/0x1e0
 [ 2092.257982]  [811bbf76] do_mount+0x1b6/0x1f0
 [ 2092.257982]  [811bc040] sys_mount+0x90/0xe0
 [ 2092.257982]  [816f45dd] system_call_fastpath+0x1a/0x1f
 [ 2092.257982] Code: 00 48 03 10 48 89 d0 48 ba 00 00 00 00 00 88 ff
 ff 48 c1 f8 06 48 c1 e0 0c 8b 74 10 60 49 8b 40 40 48 ba e1 83 0f 3e
 f8 e0 83 0f 8b 80 c4 03 00 00 48 83 e8 65 48 f7 e2 48 d1 ea 48 39 

[PATCH] Btrfs: fix crash in log replay with qgroups enabled

2013-02-13 Thread Arne Jansen
When replaying a log tree with qgroups enabled, tree_mod_log_rewind does a
sanity-check of the number of items against the maximum possible number.
It calculates that number with the nodesize of fs_root. Unfortunately
fs_root is not yet set at this stage. So instead use the nodesize from
tree_root, which is already initialized.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/ctree.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index eea5da7..6eff0fa 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1222,7 +1222,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct 
extent_buffer *eb,
 
__tree_mod_log_rewind(eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) 
-   BTRFS_NODEPTRS_PER_BLOCK(fs_info-fs_root));
+   BTRFS_NODEPTRS_PER_BLOCK(fs_info-tree_root));
 
return eb_rewin;
 }
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Heavy memory leak when using quota groups

2013-02-13 Thread Arne Jansen
Hi Shyam,

I sent a patch to the list

[PATCH] Btrfs: fix crash in log replay with qgroups enabled

that hopefully addresses this problem. As I haven't been able to
reproduce it, I also haven't been able to test it.
Could be please see if it fixes your problem and makes the filesystem
mountable again?

Thanks,
Arne

On 12.02.2013 08:31, shyam btrfs wrote:
 Also immediately after this problem, its impossible to mount the
 filesystem. it consistently fails with
 
 [ 2092.254428] BUG: unable to handle kernel NULL pointer dereference
 at 03c4
 [ 2092.255945] IP: [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257340] PGD 23d42067 PUD 3a93a067 PMD 0
 [ 2092.257982] Oops:  [#1] SMP
 [ 2092.257982] Modules linked in: raid1 xt_multiport xt_tcpudp
 nf_conntrack_ipv4 nf_defrag_ipv4 xt_state nf_conntrack iptable_filter
 ip_tables x_tables iscsi_tcp libiscsi_tcp libiscsi
 scsi_transport_iscsi xfrm_user xfrm4_tunnel tunnel4 ipcomp xfrm_ipcomp
 esp4 ah4 8021q garp stp llc bonding btrfs(OF) deflate zlib_deflate ctr
 twofish_generic twofish_x86_64_3way twofish_x86_64 twofish_common
 camellia_generic camellia_x86_64 serpent_sse2_x86_64 glue_helper lrw
 serpent_generic xts gf128mul blowfish_generic blowfish_x86_64
 blowfish_common ablk_helper cryptd cast5_generic cast_common
 des_generic xcbc rmd160 crypto_null af_key xfrm_algo scst_vdisk(OF)
 iscsi_scst(OF) scst(OF) libcrc32c microcode nfsv4 psmouse nfsd(OF)
 virtio_balloon nfs_acl serio_raw auth_rpcgss nfs fscache lockd sunrpc
 lp parport floppy ixgbevf
 [ 2092.257982] CPU 0
 [ 2092.257982] Pid: 27156, comm: mount Tainted: GF  O
 3.8.0-030800rc5-generic #201301251535 Bochs Bochs
 [ 2092.257982] RIP: 0010:[a033d0be]  [a033d0be]
 btrfs_search_old_slot+0x63e/0x940 [btrfs]
 [ 2092.257982] RSP: 0018:88003752f598  EFLAGS: 00010206
 [ 2092.257982] RAX:  RBX: 0001 RCX: 
 880017826560
 [ 2092.257982] RDX: 0f83e0f83e0f83e1 RSI: 0066 RDI: 
 8800374bda00
 [ 2092.257982] RBP: 88003752f628 R08: 880019dfc000 R09: 
 88003752f508
 [ 2092.257982] R10: 000c R11:  R12: 
 880018d60800
 [ 2092.257982] R13: 88001c3bd900 R14: 88001c3ce158 R15: 
 8800
 [ 2092.257982] FS:  7fdc62688800() GS:88003fc0()
 knlGS:
 [ 2092.257982] CS:  0010 DS:  ES:  CR0: 8005003b
 [ 2092.257982] CR2: 03c4 CR3: 3a91a000 CR4: 
 06f0
 [ 2092.257982] DR0:  DR1:  DR2: 
 
 [ 2092.257982] DR3:  DR6: 0ff0 DR7: 
 0400
 [ 2092.257982] Process mount (pid: 27156, threadinfo 88003752e000,
 task 880018ea5d00)
 [ 2092.257982] Stack:
 [ 2092.257982]  88003752f5c8 88003d554480 880017826560
 880019dfc000
 [ 2092.257982]  18d60800  
 880018729498
 [ 2092.257982]  00dc 0001 88001c3ce158
 1c3bd900
 [ 2092.257982] Call Trace:
 [ 2092.257982]  [a03b23f3] __resolve_indirect_refs+0x173/0x620 
 [btrfs]
 [ 2092.257982]  [a037aa17] ? free_extent_buffer+0x37/0x90 [btrfs]
 [ 2092.257982]  [a03b316a] find_parent_nodes+0x7da/0xf90 [btrfs]
 [ 2092.257982]  [a03b39b9] btrfs_find_all_roots+0x99/0x100 [btrfs]
 [ 2092.257982]  [81183beb] ? kfree+0x3b/0x150
 [ 2092.257982]  [a03b691b] btrfs_qgroup_account_ref+0xfb/0x550 
 [btrfs]
 [ 2092.257982]  [a0346088] ?
 btrfs_delayed_refs_qgroup_accounting+0x58/0x100 [btrfs]
 [ 2092.257982]  [81183cc4] ? kfree+0x114/0x150
 [ 2092.257982]  [a03460d3]
 btrfs_delayed_refs_qgroup_accounting+0xa3/0x100 [btrfs]
 [ 2092.257982]  [a034d269] btrfs_run_delayed_refs+0x49/0x2f0 [btrfs]
 [ 2092.257982]  [a0373f43] ?
 btrfs_run_ordered_operations+0x2b3/0x2e0 [btrfs]
 [ 2092.257982]  [a035ce25] btrfs_commit_transaction+0x85/0xad0 
 [btrfs]
 [ 2092.257982]  [a033c5de] ? btrfs_search_slot+0x2fe/0x7a0 [btrfs]
 [ 2092.257982]  [8107fc70] ? add_wait_queue+0x60/0x60
 [ 2092.257982]  [81183d42] ? kmem_cache_free+0x42/0x160
 [ 2092.257982]  [a03754c1] ?
 release_extent_buffer.isra.26+0x81/0xf0 [btrfs]
 [ 2092.257982]  [a0396aa5] btrfs_recover_log_trees+0x335/0x3b0 
 [btrfs]
 [ 2092.257982]  [a03953d0] ?
 fixup_inode_link_counts+0x150/0x150 [btrfs]
 [ 2092.257982]  [a035ae96] open_ctree+0x1646/0x1d70 [btrfs]
 [ 2092.257982]  [a0333bbb] btrfs_mount+0x57b/0x670 [btrfs]
 [ 2092.257982]  [8119e543] mount_fs+0x43/0x1b0
 [ 2092.257982]  [811b92e6] vfs_kern_mount+0x76/0x120
 [ 2092.257982]  [811ba761] do_new_mount+0xb1/0x1e0
 [ 2092.257982]  [811bbf76] do_mount+0x1b6/0x1f0
 [ 2092.257982]  [811bc040] sys_mount+0x90/0xe0
 [ 2092.257982]  [816f45dd] system_call_fastpath+0x1a/0x1f
 [ 2092.257982] 

Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit

2013-02-07 Thread Arne Jansen
On 02/07/13 07:02, Miao Xie wrote:
 The argument inherit of btrfs_ioctl_snap_create_transid() was assigned
 to NULL during we created the snapshots, so we didn't free it though we
 called kfree() in the caller.
 
 But since we are sure the snapshot creation is done after the function -
 btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't
 assign the pointer inherit to NULL, and just free it in the caller of
 btrfs_ioctl_snap_create_transid(). In this way, the code can become more
 readable.

NAK. The snapshot creation is triggered from btrfs_commit_transaction,
I don't want to implicitly rely on commit_transaction being called for
each snapshot created. I'm not even sure the async path really commits
the transaction.
The responsibility for the creation is passed to the pending_snapshot
data structure, and so should the responsibility for the inherit struct.

-Arne

 
 Reported-by: Alex Lyakas alex.bt...@zadarastorage.com
 Cc: Arne Jansen sensi...@gmx.net
 Signed-off-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/ioctl.c | 18 +++---
  1 file changed, 7 insertions(+), 11 deletions(-)
 
 diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
 index 02d3035..40f2fbf 100644
 --- a/fs/btrfs/ioctl.c
 +++ b/fs/btrfs/ioctl.c
 @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root,
 struct dentry *dentry,
 char *name, int namelen,
 u64 *async_transid,
 -   struct btrfs_qgroup_inherit **inherit)
 +   struct btrfs_qgroup_inherit *inherit)
  {
   struct btrfs_trans_handle *trans;
   struct btrfs_key key;
 @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root,
   if (IS_ERR(trans))
   return PTR_ERR(trans);
  
 - ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid,
 -inherit ? *inherit : NULL);
 + ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit);
   if (ret)
   goto fail;
  
 @@ -530,7 +529,7 @@ fail:
  
  static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
  char *name, int namelen, u64 *async_transid,
 -bool readonly, struct btrfs_qgroup_inherit **inherit)
 +bool readonly, struct btrfs_qgroup_inherit *inherit)
  {
   struct inode *inode;
   struct btrfs_pending_snapshot *pending_snapshot;
 @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, 
 struct dentry *dentry,
   pending_snapshot-dentry = dentry;
   pending_snapshot-root = root;
   pending_snapshot-readonly = readonly;
 - if (inherit) {
 - pending_snapshot-inherit = *inherit;
 - *inherit = NULL;/* take responsibility to free it */
 - }
 + pending_snapshot-inherit = inherit;
  
   trans = btrfs_start_transaction(root-fs_info-extent_root, 6);
   if (IS_ERR(trans)) {
 @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
  char *name, int namelen,
  struct btrfs_root *snap_src,
  u64 *async_transid, bool readonly,
 -struct btrfs_qgroup_inherit **inherit)
 +struct btrfs_qgroup_inherit *inherit)
  {
   struct inode *dir  = parent-dentry-d_inode;
   struct dentry *dentry;
 @@ -1454,7 +1450,7 @@ out:
  static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
   char *name, unsigned long fd, int subvol,
   u64 *transid, bool readonly,
 - struct btrfs_qgroup_inherit **inherit)
 + struct btrfs_qgroup_inherit *inherit)
  {
   int namelen;
   int ret = 0;
 @@ -1563,7 +1559,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct 
 file *file,
  
   ret = btrfs_ioctl_snap_create_transid(file, vol_args-name,
 vol_args-fd, subvol, ptr,
 -   readonly, inherit);
 +   readonly, inherit);
  
   if (ret == 0  ptr 
   copy_to_user(arg +
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] Btrfs: fix memory leak of pending_snapshot-inherit

2013-02-07 Thread Arne Jansen
On 02/07/13 10:28, Miao Xie wrote:
 On Thu, 07 Feb 2013 09:43:47 +0100, Arne Jansen wrote:
 On 02/07/13 07:02, Miao Xie wrote:
 The argument inherit of btrfs_ioctl_snap_create_transid() was assigned
 to NULL during we created the snapshots, so we didn't free it though we
 called kfree() in the caller.

 But since we are sure the snapshot creation is done after the function -
 btrfs_ioctl_snap_create_transid() - completes, it is safe that we don't
 assign the pointer inherit to NULL, and just free it in the caller of
 btrfs_ioctl_snap_create_transid(). In this way, the code can become more
 readable.

 NAK. The snapshot creation is triggered from btrfs_commit_transaction,
 I don't want to implicitly rely on commit_transaction being called for
 each snapshot created. I'm not even sure the async path really commits
 the transaction.
 The responsibility for the creation is passed to the pending_snapshot
 data structure, and so should the responsibility for the inherit struct.
 
 I don't agree with you.
 
 We are sure the async path really commits the transaction because we pass 1
 as the value of the third argument into btrfs_commit_transaction_async(). It
 means we must wait for the completion of the current transaction. So Freeing
 the inherit struct in the caller is safe.

I see your point. But speaking of readability, I have to trace quite a
lot of functions to see that even the async path waits for the snapshot
to be created. Which makes the name 'async' sort of pointless.
So from what I've read so far I _think_ your patch does the right thing.
Thanks for clearing that up.

-Arne

  
 Besides that, the pending_snapshot data structure is also allocated and freed
 by the same function in fact, why not use this style for the inherit struct.
 I think it is more readable. Assigning a pointer to be NULL and freeing it
 in the caller is very strange for the people who reads the code. (It is also
 the reason why I made the mistake at the beginning.)
 
 So I think my patch is reasonable.
 
 Thanks
 Miao
 
 -Arne


 Reported-by: Alex Lyakas alex.bt...@zadarastorage.com
 Cc: Arne Jansen sensi...@gmx.net
 Signed-off-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/ioctl.c | 18 +++---
  1 file changed, 7 insertions(+), 11 deletions(-)

 diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
 index 02d3035..40f2fbf 100644
 --- a/fs/btrfs/ioctl.c
 +++ b/fs/btrfs/ioctl.c
 @@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root 
 *root,
   struct dentry *dentry,
   char *name, int namelen,
   u64 *async_transid,
 - struct btrfs_qgroup_inherit **inherit)
 + struct btrfs_qgroup_inherit *inherit)
  {
 struct btrfs_trans_handle *trans;
 struct btrfs_key key;
 @@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root 
 *root,
 if (IS_ERR(trans))
 return PTR_ERR(trans);
  
 -   ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid,
 -  inherit ? *inherit : NULL);
 +   ret = btrfs_qgroup_inherit(trans, root-fs_info, 0, objectid, inherit);
 if (ret)
 goto fail;
  
 @@ -530,7 +529,7 @@ fail:
  
  static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen, u64 *async_transid,
 -  bool readonly, struct btrfs_qgroup_inherit **inherit)
 +  bool readonly, struct btrfs_qgroup_inherit *inherit)
  {
 struct inode *inode;
 struct btrfs_pending_snapshot *pending_snapshot;
 @@ -549,10 +548,7 @@ static int create_snapshot(struct btrfs_root *root, 
 struct dentry *dentry,
 pending_snapshot-dentry = dentry;
 pending_snapshot-root = root;
 pending_snapshot-readonly = readonly;
 -   if (inherit) {
 -   pending_snapshot-inherit = *inherit;
 -   *inherit = NULL;/* take responsibility to free it */
 -   }
 +   pending_snapshot-inherit = inherit;
  
 trans = btrfs_start_transaction(root-fs_info-extent_root, 6);
 if (IS_ERR(trans)) {
 @@ -692,7 +688,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
char *name, int namelen,
struct btrfs_root *snap_src,
u64 *async_transid, bool readonly,
 -  struct btrfs_qgroup_inherit **inherit)
 +  struct btrfs_qgroup_inherit *inherit)
  {
 struct inode *dir  = parent-dentry-d_inode;
 struct dentry *dentry;
 @@ -1454,7 +1450,7 @@ out:
  static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
 char *name, unsigned long fd, int subvol,
 u64 *transid, bool readonly,
 -   struct btrfs_qgroup_inherit **inherit)
 +   struct

Re: Leaking btrfs_qgroup_inherit on snapshot creation?

2013-02-06 Thread Arne Jansen
Hi Alex,

On 02/06/13 12:18, Alex Lyakas wrote:
 Hi Jan, Arne,
 I see this code in create_snapshot:
 
   if (inherit) {
   pending_snapshot-inherit = *inherit;
   *inherit = NULL;/* take responsibility to free it */
   }
 
 So, first thing I think it should be:
 if (*inherit)
 because in btrfs_ioctl_snap_create_v2() we have:
 struct btrfs_qgroup_inherit *inherit = NULL;
 ...
 btrfs_ioctl_snap_create_transid(..., inherit)
 
 so the current check is very unlikely to be NULL.

But in btrfs_ioctl_snap_create it is called with NULL, so *inherit would
dereference a NULL pointer.

 
 Second, I don't see anybody freeing pending_snapshot-inherit. I guess
 it should be freed after callin btrfs_qgroup_inherit() and also in
 btrfs_destroy_pending_snapshots().

You're right. In our original version (6f72c7e20dbaea5) it was still
there, in transaction.c. It has been removed in 6fa9700e734:

commit 6fa9700e734275de2acbcb0e99414bd7ddfc60f1
Author: Miao Xie mi...@cn.fujitsu.com
Date:   Thu Sep 6 04:00:32 2012 -0600

Btrfs: fix error path in create_pending_snapshot()

This patch fixes the following problem:
- If we failed to deal with the delayed dir items, we should abort
transaction,
  just as its comment said. Fix it.
- If root reference or root back reference insertion failed, we should
  abort transaction. Fix it.
- Fix the double free problem of pending-inherit.
- Do not restore the trans-rsv if we doesn't change it.
- make the error path more clearly.

Signed-off-by: Miao Xie mi...@cn.fujitsu.com

Miao, can you please explain where you see a double free?

-Arne


 Thanks,
 Alex.
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Open for contribution towards Btrfs testing

2013-01-24 Thread Arne Jansen
Hi,

On 24.01.2013 06:10, praneeth u wrote:
 Hello,
 We are team of 5 students, interns at Green turtles technologies,
 interested in contributing to btrfs.
 Any space for contribution in btrfs testing ? we will be updating
 pogress twice in a week.
 Need suggestions on how to proceed.

The quota subsystem (qgroups) is new and there are no tests for
it yet. Tests for space tracking and limiting are needed. Also
hierarchical quota need testing.
Stress testing would be useful, too.
Would you be interested in that?

-Arne

 
 
 --
 Praneeth U
 9448804728
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Warnings on file removal after Quota exceeded error (EDQUOT)

2013-01-24 Thread Arne Jansen
On 24.01.2013 10:39, Lev Vainblat wrote:
 Hi all,
 
 I executed the following simple sequence of commands:
 
 # mount /dev/sda /mnt/btrfs
 # btrfs quota enable /mnt/btrfs/
 # btrfs subvolume create /mnt/btrfs/SV
 Create subvolume '/mnt/btrfs/SV'
 # btrfs qgroup limit 1m /mnt/btrfs/SV
 # dd if=/dev/zero of=/mnt/btrfs/SV/file bs=64x1024
 dd: writing `/mnt/btrfs/SV/file': Disk quota exceeded
 16+0 records in
 15+0 records out
 983040 bytes (983 kB) copied, 0.00192474 s, 511 MB/s
 # rm /mnt/btrfs/SV/file
 
 The file was removed, but in the kern.log I see:
 

[snip]

 
 Is this an expected behavior? Am I doing anything wrong here?

I can reproduce it here, will look into it. Thanks for reporting!

-Arne

 
 Thanks,
 -Lev. 
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Quota reached: can't delete

2013-01-24 Thread Arne Jansen
On 24.01.2013 16:12, Jerome M wrote:
 Hi,
 
 With the current btrfs quota implementation, when you reach a
 subvolume quota limit, you can't delete anything without first
 removing the limit or enlarge it:
 
 rm: cannot remove `testfile.bin': Disk quota exceeded
 
 
 Is there any plan to change that?

Yes, there is. The problem is that even deletion needs space.
So we need to allow remove to go over quota. The current implementation
doesn't make this distinction.

-Arne

 
 Thanks,
 Jerome
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Rendering a btrfs filesystem unmountable with the btrfs command

2013-01-17 Thread Arne Jansen
Hi Eric,

thanks for reporting this. I sent a small patch series to the list
to fix this.
Sorry I forgot to CC you, will send the patches to you directly again.
It would be great if you could give it some testing.

Thanks,
Arne

On 15.01.2013 21:44, hop...@omnifarious.org wrote:
 mkfs.btrfs /dev/sdb
 mkdir /tmp/mnt
 mount /dev/sdb /tmp/mnt
 cd /tmp/mnt
 btrfs quota enable .
 btrfs subvol create foo
 btrfs qgroup create 1/0
 btrfs qgroup assign 0/257 1/0
 btrfs subvol snapshot foo bar
 btrfs qgroup assign 0/258 1/0
 cd ..
 umount /dev/sdb
 mount /dev/sdb /tmp/mnt
 # Still mountable!
 cd mnt
 btrfs qgroup destroy 1/0
 cd ..
 umount /dev/sdb
 mount /dev/sdb /tmp/mnt
 # Oops, no longer mountable, even in recovery mode!
 
 Help!  BTW, I'm not a mailing list subscriber.
 
 Thanks,

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] btrfs-progs: better support for external users of send, V2

2013-01-16 Thread Arne Jansen
Hi Mark,

On 16.01.2013 23:30, Mark Fasheh wrote:
 Hi,
 
   The following 4 patches make changes to btrfs-progs in order to
 provide support for external software that wants to make use of the
 excellent btrfs send ioctl.
 
 The first patch introduces support for the BTRFS_SEND_FLAG_NO_FILE_DATA flag
 which is introduced in my kernel patch titled:
 
   btrfs: add no file data flag to btrfs send ioctl
 
 which can be found on the btrfs list, and for convenience is also attached
 at the end of this e-mail.
 
 The 2nd patch creates a libbtrfs and links the rest of the build to it. The
 functionality I chose to export as of right now centers on send support. 
 With this library, an external program has a much easier time processing the
 stream which a send ioctl provides. It's worth nothing btw that this patch
 can stand alone if need be.

Splitting out the send/receive-specific parts into a lib is a great idea.
The original motivation behind our send stream format was to make it readily
receivable on different filesystems.
For this we need a generic receiver which could be based on the lib. But to
make this possible, all btrfs-specific parts need to be kept out of it, so
it can readily compile on BSD for example.
So it might make sense to split out 2 parts, one for the pure receive
functionality and one with the btrfs-specific parts.
The former lib could be named libfar, as this is the name we want to give
the stream format to make it independent from btrfs. FAR stands for
Filesystem Agnostic Replication. There are senders for other systems (especially
zfs) in preparation.
I don't know if this affects your efforts in any way, but it might be easiest
to do the split right away while you're at it :)

Thanks,
Arne

 
 The 3rd patch introduces send-test, a small piece of software (not built by
 default) to allow for testing of the send ioctl (including our new flag). As
 send-test is a client of libbtrfs it might also serve as example code for
 developers looking to make use of send.
 
 The final patch makes minor changes so that libbtrfs is usable from C++.
 
 The patches can also be viewed on github:
 
 https://github.com/markfasheh/btrfs-progs-patches/tree/no-data-and-libify
 
 Testing has been pretty straight-forward - I build the software, verify that
 things work by making a file system or using send-test.
 
 Please review. Thanks,
   --Mark
 
 Changelog:
 
 - Fixed whitespace error in patch 3 (thanks to Anand Jain for reporting)
 
 - make version; make install should work now (again, thanks to Anand Jain)
 
 - included patch by Arvin Schnell to make it possible to use libbtrfs from C++
   - From this patch I removed some code from cmds-send.c that was added
 by mistake.
 
 - libbtrfs properly links to libuuid and libm (Reported by Arvin)
 
 - library symlinks are now properly installed (Reported by Arvin)
 
 
 From: Mark Fasheh mfas...@suse.de
 
 [PATCH] btrfs: add no file data flag to btrfs send ioctl
 
 This patch adds the flag, BTRFS_SEND_FLAG_NO_FILE_DATA to the btrfs send
 ioctl code. When this flag is set, the btrfs send code will never write file
 data into the stream (thus also avoiding expensive reads of that data in the
 first place). BTRFS_SEND_C_UPDATE_EXTENT commands will be sent (instead of
 BTRFS_SEND_C_WRITE) with an offset, length pair indicating the extent in
 question.
 
 This patch does not affect the operation of BTRFS_SEND_C_CLONE commands -
 they will continue to be sent when a search finds an appropriate extent to
 clone from.
 
 Signed-off-by: Mark Fasheh mfas...@suse.de
 ---
  fs/btrfs/ioctl.h |7 +++
  fs/btrfs/send.c  |   48 
  fs/btrfs/send.h  |1 +
  3 files changed, 52 insertions(+), 4 deletions(-)
 
 diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
 index 731e287..1f6cfdd 100644
 --- a/fs/btrfs/ioctl.h
 +++ b/fs/btrfs/ioctl.h
 @@ -363,6 +363,13 @@ struct btrfs_ioctl_received_subvol_args {
   __u64   reserved[16];   /* in */
  };
  
 +/*
 + * Caller doesn't want file data in the send stream, even if the
 + * search of clone sources doesn't find an extent. UPDATE_EXTENT
 + * commands will be sent instead of WRITE commands.
 + */
 +#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
 +
  struct btrfs_ioctl_send_args {
   __s64 send_fd;  /* in */
   __u64 clone_sources_count;  /* in */
 diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
 index e78b297..8d0c6b4 100644
 --- a/fs/btrfs/send.c
 +++ b/fs/btrfs/send.c
 @@ -85,6 +85,7 @@ struct send_ctx {
   u32 send_max_size;
   u64 total_send_size;
   u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
 + u64 flags;  /* 'flags' member of btrfs_ioctl_send_args is u64 */
  
   struct vfsmount *mnt;
  
 @@ -3707,6 +3708,39 @@ out:
   return ret;
  }
  
 +/*
 + * Send an update extent command to user space.
 + */
 +static int send_update_extent(struct send_ctx *sctx,
 +   u64 offset, u32 len)
 

Re: About btrfs qgroup import/export command

2013-01-09 Thread Arne Jansen
On 09.01.2013 11:17, Miao Xie wrote:
 Hi, Arne
 
 On Wed, 19 Dec 2012 12:40:25 +0100, Arne Jansen wrote:
 On 19.12.2012 12:25, Miao Xie wrote:
 As we know, there is no backup function for qgroup. when the problem
 occurs, the users must recover qgroup configuration manually, it is not
 convenient. And besides that, some users might want to import an existed
 qgroup configuration into a new filesystem. Btrfs does not have such a
 function,it can only be done manually.

 So we want to implement btrfs qgroup import/export commands.
 1)'btrfs qgroup export' commands will export qgroup tree
   into a user's specified file.(stdout by default)

 2)user may modify the configuration file firstly and then
   import it into the filesystem.(by 'btrfs qgroup import' command)

 The file may be formated as the following:

 Qgroupid is_compressed is_exclusive   limited_sizeparent
 --
  0/10 0  10G1/0
  1/01 1  20G---
   
  If 'is_exclusive' is set, 'limited_size' corresponds to max exlusive size,
  else max referenced size. Here 'parent' exclude ancestral qgroups. 

 Is there any comment about this idea? 

 The configuration only really makes sense in combination with the existing
 subvolumes. Even if the target has subvolumes under the same name, they
 might have different internal IDs. So it might make more sense to address
 the level 0 qgroups by name.
 
 Good idea.
 
 Also it might be misleading to apply a configuration to an existing fs, as
 it currently is not possible get a correct accounting if the fs is not
 empty. Rescan is not yet implemented.
 
 Rescan will be implemented in the future, so it is not a main problem
 to implement 'btrfs qgroup import/export' commands.
 
 So instead of just saving and restoring the qgroup config, it might make
 more sense to create a new filesystem including all subvolumes and quota
 config from a config file.
 But, I'm not completely convinced that this is a features that is needed
 frequently. If I want a standard deployment, I simple write a script that
 creates the fs + subvol + quota.
 
 If users want to config some qgroups(reset the limited size,
 modify its ancestral qgroups),i think it is more convenient and flexible
 to use import/export commands than write a script.
 
 
 Above all,our qgroup import/export commands will be implemented as follows:
 
 qgroupid  is_compressed  is_exclusive  limited_size   parent   full_path
 
 
 And we may specify matching degree when we import the qgroup information.
 
 1strict matching
   qgroup(level-0) matches a subvolume/snapshot 's objectid and full path 
 exactly.
   If a qgroup fail to match, the process will exit.
 
 2general matching
   It only require qgroup(level-0) to match a subvolume/snapshot 's full 
 path.
   If the corresponding subvolume/snapshot does not exist,skip it. 
 Otherwise,apply
   modifications to the corresponding subvolume/snapshot qgroup.
 
 3weak matching
   It only require qgroup(level-0) to match a subvolume/snapshot 's full 
 path.
   If the corresponding subvolume/snapshot does not exist,create the 
 subvolume
   automatically(a tracking qgroup is also created automatically)and then 
 apply
   modifications to the newly created tracking qgroup.
 
 
 How do you think about the above idea?

I still have problems imagining a use case for this. In our setup we have
lots of subvolumes with a quota configuration that follows some rules, but
it won't be possible to just import/export from one machine to the other.
So you have to design the tool to your needs.
There's another essential tool that's still missing with regard to quota
which I'd love to see come to life:
Currently the configuration of the tracking qgroups is completely left to
the user. This requires a deep understanding how qgroups work from him.
It would be great if we could come up with a simple description language
where the user just describes what he wants to achieve and the tool calculates
the tracking groups itself. It could also contain a templating mechanism
that might cover your use case.
A description might contain information e.g. which subvols to group, from which
subvols the user intends to take snapshots in the future and in which groups
those snapshots will be put. My pdf gives some example use cases which should
be possible to cover.
That's not exactly what you have in mind, but maybe it is possible to cover
both needs with one tool.

-Arne

 
 Thanks
 Miao
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body

Re: Open for contribution towards xfstests for btrfs

2013-01-07 Thread Arne Jansen
On 07.01.2013 14:01, Liu Bo wrote:
 On Mon, Jan 07, 2013 at 03:48:43PM +0530, Kiran Patil wrote:
 Hello,

 We have a team of 5 students who would like to contribute to btrfs
 filesystem testing using xfstests.

 Is there space for them to contribute?
 
 xfstests is lacking of testcases targetting for btrfs send/recieve,
 you may be interested in it.

There is even a basis for that already, we have a testsuite for the
zfs send - btrfs receive feature at

git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git

see test.pl. It can also already be used to test btrfs send -
btrfs receive.

-Arne

 
 thanks,
 liubo
 

 If yes, to whom do they need to keep in touch for guidance.

 Thank you,
 Kiran Patil (Director)
 Green Turtles Technologies, INDIA.
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: About btrfs qgroup import/export command

2012-12-19 Thread Arne Jansen
On 19.12.2012 12:25, Miao Xie wrote:
 Hi, everyone.
 
 As we know, there is no backup function for qgroup. when the problem
 occurs, the users must recover qgroup configuration manually, it is not
 convenient. And besides that, some users might want to import an existed
 qgroup configuration into a new filesystem. Btrfs does not have such a
 function,it can only be done manually.
 
 So we want to implement btrfs qgroup import/export commands.
 1)'btrfs qgroup export' commands will export qgroup tree
   into a user's specified file.(stdout by default)
 
 2)user may modify the configuration file firstly and then
   import it into the filesystem.(by 'btrfs qgroup import' command)
 
 The file may be formated as the following:
 
 Qgroupid is_compressed is_exclusive   limited_sizeparent
 --
  0/10 0  10G1/0
  1/01 1  20G---
   
  If 'is_exclusive' is set, 'limited_size' corresponds to max exlusive size,
  else max referenced size. Here 'parent' exclude ancestral qgroups. 
 
 Is there any comment about this idea? 

The configuration only really makes sense in combination with the existing
subvolumes. Even if the target has subvolumes under the same name, they
might have different internal IDs. So it might make more sense to address
the level 0 qgroups by name.
Also it might be misleading to apply a configuration to an existing fs, as
it currently is not possible get a correct accounting if the fs is not
empty. Rescan is not yet implemented.
So instead of just saving and restoring the qgroup config, it might make
more sense to create a new filesystem including all subvolumes and quota
config from a config file.
But, I'm not completely convinced that this is a features that is needed
frequently. If I want a standard deployment, I simple write a script that
creates the fs + subvol + quota.

-Arne

 
 Thanks
 Miao
 
 
 
 
 
 
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: find-new possibility of showing modified and deleted files/directories

2012-11-01 Thread Arne Jansen
On 01.11.2012 12:00, Gabriel wrote:
 On Thu, 01 Nov 2012 06:06:57 +0100, Arne Jansen wrote:
 On 11/01/2012 02:28 AM, Shane Spencer wrote:
 That's Plan B.  I'll be making a btrfs stream decoder and doing in
 place edits.  I need to move stuff around to other filesystem types
 otherwise I'd just store the stream or apply the stream to a remote
 snapshot.
 
 That's the whole point of the btrfs-send design: It's very easy to
 receive on different filesystems. A generic receiver is in preparation.
 And to make it even more generic: A sender using the same stream format
 is also in preparation for zfs.
 
 Consider the rsync bundle format as well.
 That should provide interoperability with any filesystem.

Rsync is an interactive protocol. The idea with send/receive is that
the stream can be generated without any interactions with receiver.
You can store the stream somewhere, or replay it to many destinations.

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: find-new possibility of showing modified and deleted files/directories

2012-10-31 Thread Arne Jansen
On 11/01/2012 02:28 AM, Shane Spencer wrote:
 That's Plan B.  I'll be making a btrfs stream decoder and doing in
 place edits.  I need to move stuff around to other filesystem types
 otherwise I'd just store the stream or apply the stream to a remote
 snapshot.

That's the whole point of the btrfs-send design: It's very easy to
receive on different filesystems. A generic receiver is in preparation.
And to make it even more generic: A sender using the same stream
format is also in preparation for zfs.

 
 On Wed, Oct 31, 2012 at 4:13 PM, cwillu cwi...@gmail.com wrote:
 Probably easier to decode the btrfs-send stream, or even just use btrfs-send
 itself instead.
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: help with btrfs send

2012-10-24 Thread Arne Jansen
On 10/24/2012 06:53 PM, frantz.hacquard wrote:
 Hi,
 
 I don't know if it's the good place to ask my question.
 I'm a french student who studies IT.
 I'm working on a project with the btrfs filesystem on Gentoo 64 bits.
 I have to create snapshots and to compare the differences between them
 (new files, deleted files, modified files...).
 So i tried to use btrfs send -i old-snap new-snap.
 To make this command working i created my snapshots thanks to the -r
 option.
 However i only see on the standard output many weird characters but i
 don't understand them.

If you want to dissect the contents, you can pull the tool fardump
from here:

git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git

'far' is the name we're going to give the stream format.

-arne

 I know it's still in developement but is it working ?
 I tried with kernel 3.6.2 and 3.7.0
 
 Thanks for help.
 -- 
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Fits: tool to parse stream

2012-10-22 Thread Arne Jansen
On 15.10.2012 16:32, Chris Mason wrote:
 On Sat, Oct 13, 2012 at 09:41:28AM -0600, David Sterba wrote:
 On Sat, Oct 13, 2012 at 09:08:57AM +0100, Rory Campbell-Lange wrote:
 Perhaps BTRFS Incremental Stream or Backup Incremental Stream should
 be considered, with the file extension .bis.

 From the brainstorming we had about the name, the intention behind the
 stream is to be filesystem independent.

 So this might be named FIS with .fis extension, that is not far from the
 proposed name. However, this hasn't been approved by the Slang committee
 yet :)
 
 urbandictionary has some entries for fis, although I'm not really sure
 that matters.  .fs is still my favorite, or .fss if you want to have the
 word stream in there.

We finally settled on 'far', Filesystem Agnostic Replication.

-Arne

 
 The stream parser is really cool though, thanks Arne.
 
 -chris
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs-progs: fix unaligned accesses

2012-10-21 Thread Arne Jansen
There are some unaligned accesses in progs that cause malfunction or
crashes on ARM.
This patch fixes the ones we stumbled upon.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 btrfs-list.c |   69 +++--
 volumes.c|8 --
 2 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/btrfs-list.c b/btrfs-list.c
index e5f0f96..cb42fbc 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -720,7 +720,7 @@ static u64 find_root_gen(int fd)
int ret;
struct btrfs_ioctl_search_args args;
struct btrfs_ioctl_search_key *sk = args.key;
-   struct btrfs_ioctl_search_header *sh;
+   struct btrfs_ioctl_search_header sh;
unsigned long off = 0;
u64 max_found = 0;
int i;
@@ -771,22 +771,21 @@ static u64 find_root_gen(int fd)
off = 0;
for (i = 0; i  sk-nr_items; i++) {
struct btrfs_root_item *item;
-   sh = (struct btrfs_ioctl_search_header *)(args.buf +
- off);
 
-   off += sizeof(*sh);
+   memcpy(sh, args.buf + off, sizeof(sh));
+   off += sizeof(sh);
item = (struct btrfs_root_item *)(args.buf + off);
-   off += sh-len;
+   off += sh.len;
 
-   sk-min_objectid = sh-objectid;
-   sk-min_type = sh-type;
-   sk-min_offset = sh-offset;
+   sk-min_objectid = sh.objectid;
+   sk-min_type = sh.type;
+   sk-min_offset = sh.offset;
 
-   if (sh-objectid  ino_args.treeid)
+   if (sh.objectid  ino_args.treeid)
break;
 
-   if (sh-objectid == ino_args.treeid 
-   sh-type == BTRFS_ROOT_ITEM_KEY) {
+   if (sh.objectid == ino_args.treeid 
+   sh.type == BTRFS_ROOT_ITEM_KEY) {
max_found = max(max_found,
btrfs_root_generation(item));
}
@@ -1009,7 +1008,7 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
int ret;
struct btrfs_ioctl_search_args args;
struct btrfs_ioctl_search_key *sk = args.key;
-   struct btrfs_ioctl_search_header *sh;
+   struct btrfs_ioctl_search_header sh;
struct btrfs_root_ref *ref;
struct btrfs_root_item *ri;
unsigned long off = 0;
@@ -1064,23 +1063,22 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
 * read the root_ref item it contains
 */
for (i = 0; i  sk-nr_items; i++) {
-   sh = (struct btrfs_ioctl_search_header *)(args.buf +
- off);
-   off += sizeof(*sh);
-   if (sh-type == BTRFS_ROOT_BACKREF_KEY) {
+   memcpy(sh, args.buf + off, sizeof(sh));
+   off += sizeof(sh);
+   if (sh.type == BTRFS_ROOT_BACKREF_KEY) {
ref = (struct btrfs_root_ref *)(args.buf + off);
name_len = btrfs_stack_root_ref_name_len(ref);
name = (char *)(ref + 1);
dir_id = btrfs_stack_root_ref_dirid(ref);
 
-   add_root(root_lookup, sh-objectid, sh-offset,
+   add_root(root_lookup, sh.objectid, sh.offset,
 0, 0, dir_id, name, name_len, 0, 0, 0,
 NULL);
-   } else if (sh-type == BTRFS_ROOT_ITEM_KEY) {
+   } else if (sh.type == BTRFS_ROOT_ITEM_KEY) {
ri = (struct btrfs_root_item *)(args.buf + off);
gen = btrfs_root_generation(ri);
flags = btrfs_root_flags(ri);
-   if(sh-len 
+   if(sh.len 
   sizeof(struct btrfs_root_item_v0)) {
t = ri-otime.sec;
ogen = btrfs_root_otransid(ri);
@@ -1091,20 +1089,20 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
memset(uuid, 0, BTRFS_UUID_SIZE);
}
 
-   add_root(root_lookup, sh-objectid, 0,
-sh-offset, flags, 0, NULL, 0, ogen,
+   add_root(root_lookup, sh.objectid, 0

[PATCH v2] Btrfs-progs: fix unaligned accesses

2012-10-21 Thread Arne Jansen
There are some unaligned accesses in progs that cause malfunction or
crashes on ARM.
This patch fixes the ones we stumbled upon.

Signed-off-by: Arne Jansen sensi...@gmx.net
---

Change v1-v2:
Somehow sent the wrong patch without the patch to the setget functions.

---
 btrfs-list.c |   69 +++--
 ctree.h  |8 +-
 volumes.c|8 --
 3 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/btrfs-list.c b/btrfs-list.c
index e5f0f96..cb42fbc 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -720,7 +720,7 @@ static u64 find_root_gen(int fd)
int ret;
struct btrfs_ioctl_search_args args;
struct btrfs_ioctl_search_key *sk = args.key;
-   struct btrfs_ioctl_search_header *sh;
+   struct btrfs_ioctl_search_header sh;
unsigned long off = 0;
u64 max_found = 0;
int i;
@@ -771,22 +771,21 @@ static u64 find_root_gen(int fd)
off = 0;
for (i = 0; i  sk-nr_items; i++) {
struct btrfs_root_item *item;
-   sh = (struct btrfs_ioctl_search_header *)(args.buf +
- off);
 
-   off += sizeof(*sh);
+   memcpy(sh, args.buf + off, sizeof(sh));
+   off += sizeof(sh);
item = (struct btrfs_root_item *)(args.buf + off);
-   off += sh-len;
+   off += sh.len;
 
-   sk-min_objectid = sh-objectid;
-   sk-min_type = sh-type;
-   sk-min_offset = sh-offset;
+   sk-min_objectid = sh.objectid;
+   sk-min_type = sh.type;
+   sk-min_offset = sh.offset;
 
-   if (sh-objectid  ino_args.treeid)
+   if (sh.objectid  ino_args.treeid)
break;
 
-   if (sh-objectid == ino_args.treeid 
-   sh-type == BTRFS_ROOT_ITEM_KEY) {
+   if (sh.objectid == ino_args.treeid 
+   sh.type == BTRFS_ROOT_ITEM_KEY) {
max_found = max(max_found,
btrfs_root_generation(item));
}
@@ -1009,7 +1008,7 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
int ret;
struct btrfs_ioctl_search_args args;
struct btrfs_ioctl_search_key *sk = args.key;
-   struct btrfs_ioctl_search_header *sh;
+   struct btrfs_ioctl_search_header sh;
struct btrfs_root_ref *ref;
struct btrfs_root_item *ri;
unsigned long off = 0;
@@ -1064,23 +1063,22 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
 * read the root_ref item it contains
 */
for (i = 0; i  sk-nr_items; i++) {
-   sh = (struct btrfs_ioctl_search_header *)(args.buf +
- off);
-   off += sizeof(*sh);
-   if (sh-type == BTRFS_ROOT_BACKREF_KEY) {
+   memcpy(sh, args.buf + off, sizeof(sh));
+   off += sizeof(sh);
+   if (sh.type == BTRFS_ROOT_BACKREF_KEY) {
ref = (struct btrfs_root_ref *)(args.buf + off);
name_len = btrfs_stack_root_ref_name_len(ref);
name = (char *)(ref + 1);
dir_id = btrfs_stack_root_ref_dirid(ref);
 
-   add_root(root_lookup, sh-objectid, sh-offset,
+   add_root(root_lookup, sh.objectid, sh.offset,
 0, 0, dir_id, name, name_len, 0, 0, 0,
 NULL);
-   } else if (sh-type == BTRFS_ROOT_ITEM_KEY) {
+   } else if (sh.type == BTRFS_ROOT_ITEM_KEY) {
ri = (struct btrfs_root_item *)(args.buf + off);
gen = btrfs_root_generation(ri);
flags = btrfs_root_flags(ri);
-   if(sh-len 
+   if(sh.len 
   sizeof(struct btrfs_root_item_v0)) {
t = ri-otime.sec;
ogen = btrfs_root_otransid(ri);
@@ -1091,20 +1089,20 @@ static int __list_subvol_search(int fd, struct 
root_lookup *root_lookup)
memset(uuid, 0, BTRFS_UUID_SIZE);
}
 
-   add_root(root_lookup, sh-objectid, 0

[PATCH] Fits: tool to hash a filesystem into a single sum

2012-10-20 Thread Arne Jansen
The tool can also generate a manifest file. Given the manifest file, it
can determine which files are missing or in excess, and inform about
metadata / data checksum mismatches.

It builds for linux and solaris.

Signed-off-by: Arne Jansen sensi...@gmx.net
---

It can also be found at

git://git.kernel.org/pub/scm/linux/kernel/git/arne/fits-progs.git

---
 Makefile |   12 +-
 fssum.c  |  638 ++
 2 files changed, 649 insertions(+), 1 deletions(-)
 create mode 100644 fssum.c

diff --git a/Makefile b/Makefile
index 1c56476..5584f07 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,14 @@
-all: fitsparse
+OS = $(shell uname -s)
+ifeq $(OS) Linux
+CFLAGS = -D__LINUX__
+else
+CFLAGS = -D__SOLARIS__
+endif
+
+all: fitsparse fssum
 
 fitsparse: fitsparse.c
gcc -Wall fitsparse.c -o fitsparse
+
+fssum: fssum.c
+   gcc -Wall $(CFLAGS) fssum.c -o fssum -lssl -lcrypto
diff --git a/fssum.c b/fssum.c
new file mode 100644
index 000..491baef
--- /dev/null
+++ b/fssum.c
@@ -0,0 +1,638 @@
+/*
+ * Copyright (C) 2012 STRATO AG.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifdef __LINUX__
+#define _BSD_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include string.h
+#include fcntl.h
+#include dirent.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#ifdef __SOLARIS__
+#include sys/mkdev.h
+#endif
+#include openssl/md5.h
+#include netinet/in.h
+#include inttypes.h
+
+#define CS_SIZE 16
+#define CHUNKS 128
+
+#ifdef __LINUX__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) __bswap_64 (x)
+#endif
+#endif
+
+/* TODO: add hardlink recognition */
+/* TODO: add xattr/acl */
+
+int gen_manifest = 0;
+int in_manifest = 0;
+char *checksum = NULL;
+FILE *out_fp;
+FILE *in_fp;
+
+enum _flags {
+   FLAG_UID,
+   FLAG_GID,
+   FLAG_MODE,
+   FLAG_ATIME,
+   FLAG_MTIME,
+   FLAG_CTIME,
+   FLAG_DATA,
+   NUM_FLAGS
+};
+
+const char flchar[] = ugoamcd;
+char line[65536];
+
+int flags[NUM_FLAGS] = { 1, 1, 1, 1, 1, 0, 1 };
+
+char *
+getln(char *buf, int size, FILE *fp)
+{
+   char *p;
+   int l;
+
+   p = fgets(buf, size, fp);
+   if (!p)
+   return NULL;
+
+   l = strlen(p);
+   while(l  0   (p[l - 1] == '\n' || p[l - 1] == '\r'))
+   p[--l] = 0;
+
+   return p;
+}
+
+void
+parse_flag(int c)
+{
+   int i;
+   int is_upper = 0;
+
+   if (c = 'A'  c = 'Z') {
+   is_upper = 1;
+   c += 'a' - 'A';
+   }
+   for (i = 0; flchar[i]; ++i) {
+   if (flchar[i] == c) {
+   flags[i] = is_upper ? 0 : 1;
+   return;
+   }
+   }
+   fprintf(stderr, unrecognized flag %c\n, c);
+   exit(-1);
+}
+
+void
+parse_flags(char *p)
+{
+   while (*p)
+   parse_flag(*p++);
+}
+
+void
+usage(void)
+{
+   fprintf(stderr, usage: fssum options path\n);
+   fprintf(stderr,   options:\n);
+   fprintf(stderr, -f  : write out a full manifest file\n);
+   fprintf(stderr, -w file   : send output to file\n);
+   fprintf(stderr,
+   -r file   : read checksum or manifest from file\n);
+   fprintf(stderr, -[ugoamcd]  : specify which fields to include in 
checksum calculation.\n);
+   fprintf(stderr,  u  : include uid\n);
+   fprintf(stderr,  g  : include gid\n);
+   fprintf(stderr,  o  : include mode\n);
+   fprintf(stderr,  m  : include mtime\n);
+   fprintf(stderr,  a  : include atime\n);
+   fprintf(stderr,  c  : include ctime\n);
+   fprintf(stderr,  d  : include file data\n);
+   fprintf(stderr, -[UGOAMCD]  : exclude respective field from 
calculation\n);
+   fprintf(stderr, -n  : reset all flags\n);
+   fprintf(stderr, -N  : set all flags\n);
+   fprintf(stderr, -h  : this help\n\n);
+   fprintf(stderr, The default field mask is ugoamCd. If the 
checksum/manifest is read from a\n);
+   fprintf(stderr, file, the mask is taken from there and the values 
given on the command line\n);
+   fprintf(stderr, are ignored.\n);
+   exit(-1

[PATCH] Fits: tool to hash a filesystem into a single sum

2012-10-19 Thread Arne Jansen
The tool can also generate a manifest file. Given the manifest file, it
can determine which files are missing or in excess, and inform about
metadata / data checksum mismatches.

It builds for linux and solaris.

Signed-off-by: Arne Jansen sensi...@gmx.net
---

It can also be found at

git://git.kernel.org/pub/scm/linux/kernel/git/arne/fits-progs.git

---
 Makefile |   12 +-
 fssum.c  |  638 ++
 2 files changed, 649 insertions(+), 1 deletions(-)
 create mode 100644 fssum.c

diff --git a/Makefile b/Makefile
index 1c56476..5584f07 100644
--- a/Makefile
+++ b/Makefile
@@ -1,4 +1,14 @@
-all: fitsparse
+OS = $(shell uname -s)
+ifeq $(OS) Linux
+CFLAGS = -D__LINUX__
+else
+CFLAGS = -D__SOLARIS__
+endif
+
+all: fitsparse fssum
 
 fitsparse: fitsparse.c
gcc -Wall fitsparse.c -o fitsparse
+
+fssum: fssum.c
+   gcc -Wall $(CFLAGS) fssum.c -o fssum -lssl -lcrypto
diff --git a/fssum.c b/fssum.c
new file mode 100644
index 000..491baef
--- /dev/null
+++ b/fssum.c
@@ -0,0 +1,638 @@
+/*
+ * Copyright (C) 2012 STRATO AG.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#ifdef __LINUX__
+#define _BSD_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include string.h
+#include fcntl.h
+#include dirent.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#ifdef __SOLARIS__
+#include sys/mkdev.h
+#endif
+#include openssl/md5.h
+#include netinet/in.h
+#include inttypes.h
+
+#define CS_SIZE 16
+#define CHUNKS 128
+
+#ifdef __LINUX__
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) __bswap_64 (x)
+#endif
+#endif
+
+/* TODO: add hardlink recognition */
+/* TODO: add xattr/acl */
+
+int gen_manifest = 0;
+int in_manifest = 0;
+char *checksum = NULL;
+FILE *out_fp;
+FILE *in_fp;
+
+enum _flags {
+   FLAG_UID,
+   FLAG_GID,
+   FLAG_MODE,
+   FLAG_ATIME,
+   FLAG_MTIME,
+   FLAG_CTIME,
+   FLAG_DATA,
+   NUM_FLAGS
+};
+
+const char flchar[] = ugoamcd;
+char line[65536];
+
+int flags[NUM_FLAGS] = { 1, 1, 1, 1, 1, 0, 1 };
+
+char *
+getln(char *buf, int size, FILE *fp)
+{
+   char *p;
+   int l;
+
+   p = fgets(buf, size, fp);
+   if (!p)
+   return NULL;
+
+   l = strlen(p);
+   while(l  0   (p[l - 1] == '\n' || p[l - 1] == '\r'))
+   p[--l] = 0;
+
+   return p;
+}
+
+void
+parse_flag(int c)
+{
+   int i;
+   int is_upper = 0;
+
+   if (c = 'A'  c = 'Z') {
+   is_upper = 1;
+   c += 'a' - 'A';
+   }
+   for (i = 0; flchar[i]; ++i) {
+   if (flchar[i] == c) {
+   flags[i] = is_upper ? 0 : 1;
+   return;
+   }
+   }
+   fprintf(stderr, unrecognized flag %c\n, c);
+   exit(-1);
+}
+
+void
+parse_flags(char *p)
+{
+   while (*p)
+   parse_flag(*p++);
+}
+
+void
+usage(void)
+{
+   fprintf(stderr, usage: fssum options path\n);
+   fprintf(stderr,   options:\n);
+   fprintf(stderr, -f  : write out a full manifest file\n);
+   fprintf(stderr, -w file   : send output to file\n);
+   fprintf(stderr,
+   -r file   : read checksum or manifest from file\n);
+   fprintf(stderr, -[ugoamcd]  : specify which fields to include in 
checksum calculation.\n);
+   fprintf(stderr,  u  : include uid\n);
+   fprintf(stderr,  g  : include gid\n);
+   fprintf(stderr,  o  : include mode\n);
+   fprintf(stderr,  m  : include mtime\n);
+   fprintf(stderr,  a  : include atime\n);
+   fprintf(stderr,  c  : include ctime\n);
+   fprintf(stderr,  d  : include file data\n);
+   fprintf(stderr, -[UGOAMCD]  : exclude respective field from 
calculation\n);
+   fprintf(stderr, -n  : reset all flags\n);
+   fprintf(stderr, -N  : set all flags\n);
+   fprintf(stderr, -h  : this help\n\n);
+   fprintf(stderr, The default field mask is ugoamCd. If the 
checksum/manifest is read from a\n);
+   fprintf(stderr, file, the mask is taken from there and the values 
given on the command line\n);
+   fprintf(stderr, are ignored.\n);
+   exit(-1

[PATCH] Btrfs: send correct rdev and mode in btrfs-send

2012-10-15 Thread Arne Jansen
When sending a device file, the stream was missing the mode. Also the
rdev was encoded wrongly.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/send.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index c7beb54..57933e8 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2430,7 +2430,8 @@ verbose_printk(btrfs: send_create_inode %llu\n, ino);
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH_LINK, p);
} else if (S_ISCHR(mode) || S_ISBLK(mode) ||
   S_ISFIFO(mode) || S_ISSOCK(mode)) {
-   TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, rdev);
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_RDEV, new_encode_dev(rdev));
+   TLV_PUT_U64(sctx, BTRFS_SEND_A_MODE, mode);
}
 
ret = send_cmd(sctx);
-- 
1.7.3.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Fits: tool to parse stream

2012-10-13 Thread Arne Jansen
On 10/12/12 15:32, Arne Jansen wrote:

 The idea of the btrfs send stream format was to generate it in a way that
 it is easy to receive on different platforms. Thus the proposed name FITS, for
 Filesystem Incremental Backup Stream. We should also build the tools to
 receive the stream on different platforms.

I meant to write 'Filesystem Incremental Transport Stream', but, as
Andrey Kuzmin pointed out, the name FITS is already taken. As the
'Backup' slipped in somehow, FIBS might be an alternative. Any
thoughts?

-Arne
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Fits: tool to parse stream

2012-10-12 Thread Arne Jansen
Simple tool to parse a fits-stream from stdout.

Signed-off-by: Arne Jansen sensi...@gmx.net
---

The idea of the btrfs send stream format was to generate it in a way that
it is easy to receive on different platforms. Thus the proposed name FITS, for
Filesystem Incremental Backup Stream. We should also build the tools to
receive the stream on different platforms.
As a place to collect those I created at git at 

git://git.kernel.org/pub/scm/linux/kernel/git/arne/fits-progs.git
---

 Makefile|4 +
 fitsparse.c |  402 +++
 2 files changed, 406 insertions(+), 0 deletions(-)
 create mode 100644 Makefile
 create mode 100644 fitsparse.c

diff --git a/Makefile b/Makefile
new file mode 100644
index 000..1c56476
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,4 @@
+all: fitsparse
+
+fitsparse: fitsparse.c
+   gcc -Wall fitsparse.c -o fitsparse
diff --git a/fitsparse.c b/fitsparse.c
new file mode 100644
index 000..3a52250
--- /dev/null
+++ b/fitsparse.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2012 STRATO.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include stdio.h
+#include stdlib.h
+#include stdint.h
+#include string.h
+#include errno.h
+#include time.h
+
+int print_binary = 0;
+
+/*
+ * stream definitions taken from send.h, (C) Alexander Block
+ */
+#define BTRFS_SEND_STREAM_MAGIC btrfs-stream
+#define BTRFS_SEND_STREAM_VERSION 1
+#define BTRFS_SEND_BUF_SIZE (1024 * 64)
+
+struct btrfs_stream_header {
+   char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
+   uint32_t version;
+} __attribute__ ((__packed__));
+
+struct btrfs_cmd_header {
+   /* len excluding the header */
+   uint32_t len;
+   uint16_t cmd;
+   /* crc including the header with zero crc field */
+   uint32_t crc;
+} __attribute__ ((__packed__));
+
+struct btrfs_tlv_header {
+   uint16_t tlv_type;
+   /* len excluding the header */
+   uint16_t tlv_len;
+} __attribute__ ((__packed__));
+
+char *cmd_names[] = {
+   UNSPEC,
+   SUBVOL,
+   SNAPSHOT,
+   MKFILE,
+   MKDIR,
+   MKNOD,
+   MKFIFO,
+   MKSOCK,
+   SYMLINK,
+   RENAME,
+   LINK,
+   UNLINK,
+   RMDIR,
+   SET_XATTR,
+   REMOVE_XATTR,
+   WRITE,
+   CLONE,
+   TRUNCATE,
+   CHMOD,
+   CHOWN,
+   UTIMES,
+   END
+};
+#define MAX_CMDS (sizeof(cmd_names) / sizeof(char *))
+
+#define TYPE_UNDEF 0
+#define TYPE_INT   1
+#define TYPE_STRING2
+#define TYPE_BINARY3
+#define TYPE_TIME  4
+#define TYPE_HEX   5
+#define TYPE_OCTAL 6
+#define TYPE_LEN_ONLY  7
+
+struct attr_types {
+   const char  *name;
+   int type;
+} attrs[] = {
+   { UNSPEC, TYPE_UNDEF },
+   { UUID, TYPE_BINARY },
+   { CTRANSID, TYPE_INT },
+   { INO, TYPE_INT },
+   { SIZE, TYPE_INT },
+   { MODE, TYPE_OCTAL },
+   { UID, TYPE_INT },
+   { GID, TYPE_INT },
+   { RDEV, TYPE_HEX },
+   { CTIME, TYPE_TIME },
+   { MTIME, TYPE_TIME },
+   { ATIME, TYPE_TIME },
+   { OTIME, TYPE_TIME },
+   { XATTR_NAME, TYPE_STRING },
+   { XATTR_DATA, TYPE_BINARY },
+   { PATH, TYPE_STRING },
+   { PATH_TO, TYPE_STRING },
+   { PATH_LINK, TYPE_STRING },
+   { FILE_OFFSET, TYPE_INT },
+   { DATA, TYPE_LEN_ONLY },
+   { CLONE_UUID, TYPE_BINARY },
+   { CLONE_CTRANSID, TYPE_INT },
+   { CLONE_PATH, TYPE_STRING },
+   { CLONE_OFFSET, TYPE_INT },
+   { CLONE_LEN, TYPE_INT }
+};
+#define MAX_ATTRS (sizeof(attrs) / sizeof(struct attr_types))
+
+uint16_t
+read16(const void *d)
+{
+   const uint8_t *data = d;
+
+   return data[0] + (1  8) * (uint16_t)data[1];
+}
+
+uint32_t
+read32(const void *d)
+{
+   const uint8_t *data = d;
+
+   return read16(data) + (1  16) * (uint32_t)read16(data + 2);
+}
+
+uint64_t
+read64(const void *d)
+{
+   const uint8_t *data = d;
+
+   return read32(data) + (1ull  32) * (uint64_t)read32(data + 2);
+}
+
+/* 
+ * Extracted from the linux kernel source code, lib/libcrc32c.c.
+ * 
+ */
+static const uint32_t crc32c_table[256] = {
+   0xL, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+   0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+   0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+   0x4D43CFD0L

Re: [PATCH] Fits: tool to parse stream

2012-10-12 Thread Arne Jansen
On 12.10.2012 17:19, Andrey Kuzmin wrote:
 Besides already occupied name discussed in a separate thread, I'd suggest to
 check if libcrc32 may be linked directly instead of copying. Formats for 
 various
 types may be also declared static instead of conditionals in the code.
 

The code is completely self contained. The idea was that it just builds on every
platform.

-Arne

 Regards,
 Andrey
 
 12.10.2012 17:33 пользователь Arne Jansen sensi...@gmx.net
 mailto:sensi...@gmx.net написал:
 
 Simple tool to parse a fits-stream from stdout.
 
 Signed-off-by: Arne Jansen sensi...@gmx.net mailto:sensi...@gmx.net
 ---
 
 The idea of the btrfs send stream format was to generate it in a way that
 it is easy to receive on different platforms. Thus the proposed name 
 FITS, for
 Filesystem Incremental Backup Stream. We should also build the tools to
 receive the stream on different platforms.
 As a place to collect those I created at git at
 
 git://git.kernel.org/pub/scm/linux/kernel/git/arne/fits-progs.git
 http://git.kernel.org/pub/scm/linux/kernel/git/arne/fits-progs.git
 ---
 
  Makefile|4 +
  fitsparse.c |  402 
 +++
  2 files changed, 406 insertions(+), 0 deletions(-)
  create mode 100644 Makefile
  create mode 100644 fitsparse.c
 
 diff --git a/Makefile b/Makefile
 new file mode 100644
 index 000..1c56476
 --- /dev/null
 +++ b/Makefile
 @@ -0,0 +1,4 @@
 +all: fitsparse
 +
 +fitsparse: fitsparse.c
 +   gcc -Wall fitsparse.c -o fitsparse
 diff --git a/fitsparse.c b/fitsparse.c
 new file mode 100644
 index 000..3a52250
 --- /dev/null
 +++ b/fitsparse.c
 @@ -0,0 +1,402 @@
 +/*
 + * Copyright (C) 2012 STRATO.  All rights reserved.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public
 + * License v2 as published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public
 + * License along with this program; if not, write to the
 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 + * Boston, MA 021110-1307, USA.
 + */
 +
 +#include stdio.h
 +#include stdlib.h
 +#include stdint.h
 +#include string.h
 +#include errno.h
 +#include time.h
 +
 +int print_binary = 0;
 +
 +/*
 + * stream definitions taken from send.h, (C) Alexander Block
 + */
 +#define BTRFS_SEND_STREAM_MAGIC btrfs-stream
 +#define BTRFS_SEND_STREAM_VERSION 1
 +#define BTRFS_SEND_BUF_SIZE (1024 * 64)
 +
 +struct btrfs_stream_header {
 +   char magic[sizeof(BTRFS_SEND_STREAM_MAGIC)];
 +   uint32_t version;
 +} __attribute__ ((__packed__));
 +
 +struct btrfs_cmd_header {
 +   /* len excluding the header */
 +   uint32_t len;
 +   uint16_t cmd;
 +   /* crc including the header with zero crc field */
 +   uint32_t crc;
 +} __attribute__ ((__packed__));
 +
 +struct btrfs_tlv_header {
 +   uint16_t tlv_type;
 +   /* len excluding the header */
 +   uint16_t tlv_len;
 +} __attribute__ ((__packed__));
 +
 +char *cmd_names[] = {
 +   UNSPEC,
 +   SUBVOL,
 +   SNAPSHOT,
 +   MKFILE,
 +   MKDIR,
 +   MKNOD,
 +   MKFIFO,
 +   MKSOCK,
 +   SYMLINK,
 +   RENAME,
 +   LINK,
 +   UNLINK,
 +   RMDIR,
 +   SET_XATTR,
 +   REMOVE_XATTR,
 +   WRITE,
 +   CLONE,
 +   TRUNCATE,
 +   CHMOD,
 +   CHOWN,
 +   UTIMES,
 +   END
 +};
 +#define MAX_CMDS (sizeof(cmd_names) / sizeof(char *))
 +
 +#define TYPE_UNDEF 0
 +#define TYPE_INT   1
 +#define TYPE_STRING2
 +#define TYPE_BINARY3
 +#define TYPE_TIME  4
 +#define TYPE_HEX   5
 +#define TYPE_OCTAL 6
 +#define TYPE_LEN_ONLY  7
 +
 +struct attr_types {
 +   const char  *name;
 +   int type;
 +} attrs[] = {
 +   { UNSPEC, TYPE_UNDEF },
 +   { UUID, TYPE_BINARY },
 +   { CTRANSID, TYPE_INT },
 +   { INO, TYPE_INT },
 +   { SIZE, TYPE_INT },
 +   { MODE, TYPE_OCTAL },
 +   { UID, TYPE_INT },
 +   { GID, TYPE_INT },
 +   { RDEV, TYPE_HEX },
 +   { CTIME, TYPE_TIME },
 +   { MTIME, TYPE_TIME },
 +   { ATIME, TYPE_TIME

Re: working quota example?

2012-10-09 Thread Arne Jansen
On 09.10.2012 09:13, matthieu Barthélemy wrote:
 On Mon, Oct 8, 2012 at 9:51 PM, Arne Jansen sensi...@gmx.net wrote:
 On 10/08/12 21:31, matthieu Barthélemy wrote:


 Are there any plan to maybe get a better 'btrfs quota show' output?

 Definitely. The first priority was to get the kernel part running, when
 that is settled, we can improve the user mode part. There's also still
 some work to do to make the tracking qgroups more presentable.

 Yes, and it seems to run well, I confirm that I was able to set a
 quota on a test subvolume and have it trigger as expected.
 
 But now I'm stuck again, though maybe the problem is as obvious as the
 one that made me post first...
 After having created a file that triggered a quota exceeded error, I
 created a snapshot of my subvolume. No problem here.
 Then I tried to remove the original 'big' test file :
 
 rm: cannot remove `/btrfs/test/bigFile': Disk quota exceeded
 
 
 I then tried to delete the snapshot subvol to see if it helped:
 
 # ./btrfs sub delete /btrfs/test/.snap1/
 Delete subvolume '/btrfs/test/.snap1'
 # rm /btrfs/test/bigFile
 rm: cannot remove `/btrfs/roger/bigFile': Disk quota exceeded
 
 # ./btrfs qgroup show /btrfs/
 0/257 1073725440 4096
 0/261 1073725440 4096
 
 261 was the snap that I just removed. Why is it still there?
 

It may be that the corresponding qgroup does not get removed automatically
with the subvol. So it's not the subvol that's still there, just the qgroup.

 No problem, let's remove it:
 # ./btrfs qgroup destroy 0/261 /btrfs/
 
 # rm /btrfs/test/bigFile
 rm: cannot remove `/btrfs/test/bigFile': Disk quota exceeded

Do you have a limit on 257?

 
 # ls -lsha /btrfs/test/
 total 1.0G
0 drwxr-xr-x 1 root root   14 Oct  9 09:00 .
 4.0K drwxr-xr-x 1 root root   10 Oct  8 19:56 ..
 1.0G -rw-r--r-- 1 root root 1.0G Oct  8 19:58 bigFile
 
 
 
 I have to destroy my subvolume qgroup (0/257) to be able to 'rm' my
 file.  Is this the expected behavior?

In a way. You could just have raised the limit. The problem with cow
filesystems is that a delete actually takes space, even if it gets
freed afterwards when no snapshots are present. The quota code currently
has no special handling for 'rm', though it would obviously be useful.
It is already on the TODO list.

-Arne

 Of course I did something wrong again, but where?
 
 Thanks for your help,

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: working quota example?

2012-10-08 Thread Arne Jansen
On 10/08/12 21:31, matthieu Barthélemy wrote:

 
 Are there any plan to maybe get a better 'btrfs quota show' output?

Definitely. The first priority was to get the kernel part running, when
that is settled, we can improve the user mode part. There's also still
some work to do to make the tracking qgroups more presentable.

 Maybe with more details, maybe a simple ' 1 subvolume + all its
 snapshots'  accounting.

Well, there's no such thing as 'the snapshot of a subvolume'. As in
btrfs each snapshot instantly is a subvolume in it's own right. Btrfs
doesn't really keep track which snapshot is a descendant of which.

 Maybe I missed something, and I admit I didn't read all the btrfs-progs
 patches related to qgroups, but there doesn't seem to have an option to
 show a subvolume's quota limit (not the referenced/exclusive usage
 counters). Am I right?

Probably, I don't remember :) That should be easy to fix anyway.

 
 Thanks again Arne for your help!
 
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs receive to subdirectory

2012-10-08 Thread Arne Jansen
On 10/08/12 18:30, Rory Campbell-Lange wrote:
 I can send snapshots to volume, but not volume/dir. Please advise
 if what I am doing is incorrect.
 
 Rory
 
 Format usb3 disk and mount
   root@orchard:/bkp# mkfs.btrfs /dev/sdb1
WARNING! - Btrfs v0.20-rc1-37-g91d9eec IS EXPERIMENTAL
WARNING! - see http://btrfs.wiki.kernel.org before using
fs created label (null) on /dev/sdb1
  nodesize 4096 leafsize 4096 sectorsize 4096 size 698.64GB
Btrfs v0.20-rc1-37-g91d9eec
   mount /dev/sdb1 /mnt
 
 
 Create snapshots on /bkp share
   root@orchard:/bkp# btrfs subvolume snapshot -r subvol 
 snaps/snap_081012_1715
Create a readonly snapshot of 'subvol' in 'snaps/snap_081012_1715'
   root@orchard:/bkp# mutt -f subvol/INBOX/
  1561 kept, 18 deleted.
   root@orchard:/bkp# btrfs subvolume snapshot -r subvol 
 snaps/snap_081012_1716
  Create a readonly snapshot of 'subvol' in 'snaps/snap_081012_1716'
 
 Send base backup to /mnt
   root@orchard:/bkp# btrfs send snaps/snap_081012_1715 | btrfs receive 
 /mnt
At subvol snaps/snap_081012_1715
At subvol snap_081012_1715
 
 Send incremental backup to /mnt
   root@orchard:/bkp# btrfs send -p snaps/snap_081012_1715 \
snaps/snap_081012_1716 | btrfs receive /mnt
At subvol snaps/snap_081012_1716
At snapshot snap_081012_1716
 
   root@orchard:/bkp# ls /mnt
   snap_081012_1715  snap_081012_1716
 
 Results:
   root@orchard:/bkp# btrfs subvolume list /bkp 
ID 259 gen 62 top level 5 path subvol
ID 278 gen 60 top level 5 path snaps/snap_081012_1715
ID 279 gen 62 top level 5 path snaps/snap_081012_1716
   root@orchard:/bkp# btrfs subvolume list /mnt 
ID 256 gen 8 top level 5 path snap_081012_1715
ID 259 gen 9 top level 5 path snap_081012_1716
 
 Restart:
   root@orchard:/bkp# btrfs subvolume del /mnt/snap_081012_171*
Delete subvolume '/mnt/snap_081012_1715'
Delete subvolume '/mnt/snap_081012_1716'
 
 Try and snap to /mnt/subdir
   root@orchard:/bkp# mkdir /mnt/snaps
   root@orchard:/bkp# btrfs send snaps/snap_081012_1715 | btrfs receive 
 /mnt/snaps
At subvol snaps/snap_081012_1715
At subvol snap_081012_1715
   root@orchard:/bkp# btrfs send -p snaps/snap_081012_1715 \
  snaps/snap_081012_1716 | btrfs receive /mnt/snaps
At subvol snaps/snap_081012_1716
At snapshot snap_081012_1716
ERROR: open snaps/snap_081012_1715 failed. No such file or directory
 root@orchard:/bkp# ls /mnt/snaps
  snap_081012_1715

The target has to be a subvol also. But interestingly enough, it also
fails for a subvol. The base send works, the incremental fails, because
btrfs receive can't find snaps/snap_081012_1715. If you give /mnt/snaps
as the target for the base and just /mnt for the incremental, it works.
There's clearly something broken there...

-arne

 
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs progs: quota groups support

2012-10-04 Thread Arne Jansen
On 29.09.2012 23:48, Diego Calleja wrote:
 On Viernes, 10 de agosto de 2012 15:51:07 Jan Schmidt escribió:
 From: Arne Jansen sensi...@gmx.net

 Signed-off-by: Jan Schmidt list.bt...@jan-o-sch.net
 Signed-off-by: Arne Jansen sensi...@gmx.net
 ---
 This is the rebased version of Arne's qgroup patch set. He's the
 original author, which is why I'm sending with his author tag.
 
 A small suggestion, it would be nice to have a man page update.

I thought Goffredo had already compiled a man page, with his
patch

[PATCH 6/7] qgroup/quota info for man page

If it hasn't been merged it might be time to do so.

-Arne


 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs ulist use rbtree instead

2012-10-04 Thread Arne Jansen
On 04.10.2012 11:26, David Sterba wrote:
 @@ -207,16 +266,23 @@ EXPORT_SYMBOL(ulist_add);
   * end is reached. No guarantee is made with respect to the order in which
   * the elements are returned. They might neither be returned in order of
   * addition nor in ascending order.
 - * It is allowed to call ulist_add during an enumeration. Newly added items
 - * are guaranteed to show up in the running enumeration.
   */
  struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_iterator 
 *uiter)
 
 Quick observation:
 
 If there's code relying on the behaviour stated in the removed part of
 the comment, it will break. Have you verified this is not the case?

It's a good thing to use rb-trees when the small inline cache is exhausted,
but of course it should keep the semantics. We heavily rely on the removed
part.
It should be possible to keep the semantics if the elements are also kept
in a linked list. As it inflates the size of struct ulist_node even more,
it might make sense to use a smaller struct for the inline cache to keep
the footprint low.

Also, a commit message might be good that explains the motivation for the
change. Have you done any measurements?

Thanks for working on this.

-Arne

 
 
 david
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: try to avoid doing a search in btrfs_next_leaf

2012-10-02 Thread Arne Jansen
On 02.10.2012 17:27, Josef Bacik wrote:
 On Tue, Oct 02, 2012 at 09:05:43AM -0600, David Sterba wrote:
 On Tue, Oct 02, 2012 at 10:32:32AM -0400, Josef Bacik wrote:
 On Tue, Oct 02, 2012 at 08:25:38AM -0600, David Sterba wrote:
 Hi,

 On Mon, Sep 24, 2012 at 04:02:59PM -0400, Josef Bacik wrote:
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -571,6 +571,7 @@ struct btrfs_path {
   unsigned int skip_locking:1;
   unsigned int leave_spinning:1;
   unsigned int search_commit_root:1;
 + unsigned int shecantgoanyfarthercapt:1;

 so you did not make it to LWN's quote of the week, can you please rename
 it to something sensible?


 I don't see what's unsensible about it.

 Itsfunnybuthardtoreadanddoesnothelpunderstandingthecodewhereitsused.

 
 Idisagreeithinkithelpstremendouslywiththeunderstandingofthecode,youcantgoanyfarther,butfineineedotaddacommenttobtrfs_next_leafanywaysoi'llnameitsomethingelse,anysuggestions?
 

wecantgoanyfarther

 Josef
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: Add code to support file creation time.

2012-09-30 Thread Arne Jansen
On 07/04/12 13:04, Alexander Block wrote:
 On Wed, Jul 4, 2012 at 9:56 AM, Li Zefan lize...@huawei.com wrote:
 On 2012/7/4 15:18, chandan r wrote:

 This patch adds a new member to the 'struct btrfs_inode' structure to hold
 the file creation time.



 Well, how do users use this file creation time? There's no syscall and 
 there's
 no ioctl that exports this information. That xstat syscall hasn't been 
 accepted,
 so you can revise and repost the patch when you see it happens.
 In my opinion we should still include this patch. Currently, otime is never 
 even
 initialized, having undefined values. If it ever gets possible to
 access otime, we
 would at least have some inodes with valid otime fields.

I'll second that, even if by now the fields get correctly initialized.
Why should we zero the fields instead of setting them to the correct
values?

-Arne

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: do not async metadata csums if we have hardware crc32c

2012-09-24 Thread Arne Jansen
On 09/24/12 20:11, Josef Bacik wrote:
 The reason we offload csumming is because it is CPU intensive, except it is
 not on modern intel CPUs.  So check to see if we support hardware crc32c,
 and if we do just do the csumming in our current threads context.  Otherwise
 we can farm it off.  Thanks,
 
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
  fs/btrfs/disk-io.c |   17 +
  1 files changed, 17 insertions(+), 0 deletions(-)
 
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index dcaf556..830b9af 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -31,6 +31,7 @@
  #include linux/migrate.h
  #include linux/ratelimit.h
  #include asm/unaligned.h
 +#include asm/cpufeature.h
  #include compat.h
  #include ctree.h
  #include disk-io.h
 @@ -880,6 +881,22 @@ static int btree_submit_bio_hook(struct inode *inode, 
 int rw, struct bio *bio,
   }
  
   /*
 +  * Pretty sure I'm going to hell for this.  If our CPU can do crc32cs in
 +  * the hardware then there is no reason to do the csum stuff
 +  * asynchronously, it will be faster to do it inline, so test to see if
 +  * our CPU can do hardware crc32c and if it can just do the csum in our
 +  * threads context.
 +  */
 +#ifdef CONFIG_X86
 + if (cpu_has_xmm4_2) {
 + printk(KERN_ERR doing it the fast way\n);

You'll probably go to hell for the printk...

 + ret = btree_csum_one_bio(bio);
 + if (ret)
 + return ret;
 + return btrfs_map_bio(BTRFS_I(inode)-root, rw, bio, mirror_num, 
 0);
 + }
 +#endif
 + /*
* kthread helpers are used to submit writes so that checksumming
* can happen in parallel across all CPUs
*/
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Identifying reflink / CoW files

2012-09-22 Thread Arne Jansen
On 09/22/12 05:38, Jp Wise wrote:
 Good morning, I'm working on an offline deduplication script intended to
 work around the copy-on-write functionality of BTRFS.
 
 Simply put - is there any existing utility to compare two files (or
 dirs) and output if the files share the same physical extents / data
 blocks on disk?
 - aka - they're CoW copies.
 
 I'm not actively working with BTRFS yet, but for the project i'm working
 on it's looking to the be most suitable candidate, and the CoW
 functionality avoids issues with file changes that hardlinks would create.
 From reading other posts, aware the information could be pulled out via
 btrfs-debug-tree, but it would then involve parsing the entire output to
 locate the required files inodes and their extents which seems like
 quite a roundabout way to retrieve the information.
 
 Also my programming skills aren't  up to the task of trying to pull the
 tree data directly from the filesystem to do it, and I'd like to avoid
 doing byte-by-byte comparisons on all files as it's inefficient if the
 file can instead be identified as a CoW copy.

The information is available in the kernel, but to find a good way to
extract it you have to describe in much more detail what you intend to
do. What I, first of all, don't understand, is, why you need the
information of already shared (=deduped) blocks to build a dedup. Don't
you want to find data that is identical, but not shared, instead?

 
 Open to suggestions of other tools that could be used to acheive the
 desired result.
 

Afaik without playing with it myself fiemap can give you information
about the mappings of each file. If the mappings of 2 files match,
the data is shared.

 Thanks.
 Jp.
 -- 
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Cannot create subvolume with quota enabled

2012-09-11 Thread Arne Jansen
On 11.09.2012 07:24, Marios Titas wrote:
 On Fri, Sep 7, 2012 at 6:04 PM, Arne Jansen sensi...@gmx.net wrote:
 Hi Andreas,


 Thanks for giving quota a try. I sent a fix separately with
 the subject

 [PATCH] Btrfs: btrfs_qgroup_inherit wrongly returns an error

 Could you please see if it fixes the problem?
 
 Even with this patch I still have a similar problem: When I try to
 create a snapshot of an old subvolume that was created before I
 enabled quotas, it fails. For new subvolumes it works just fine.
 Here's an example ouput:
 
 virtual ~ # btrfs sub snap /mnt/butter/old /mnt/butter/old-snap
 Create a snapshot of '/mnt/butter/old' in '/mnt/butter/old-snap'
 ERROR: cannot snapshot '/mnt/butter/old' - Invalid argument

instead of applying my patch, could you please just revert

commit 5986802c2fcc754040bb7ed95f30bb16c4a843b7
Author: Dan Carpenter dan.carpen...@oracle.com
Date:   Mon Jul 30 02:16:10 2012 -0600

Btrfs: fix some error codes in btrfs_qgroup_inherit()

It introduces both faulty return codes.

Thanks,
Arne
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Revert Btrfs: fix some error codes in btrfs_qgroup_inherit()

2012-09-11 Thread Arne Jansen
This reverts commit 5986802c2fcc754040bb7ed95f30bb16c4a843b7.

Both paths are not error paths but regular cases where non-qgroup
subvols are involved.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/qgroup.c |8 ++--
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 38b42e7..b650155 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1371,10 +1371,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle 
*trans,
 
if (srcid) {
srcgroup = find_qgroup_rb(fs_info, srcid);
-   if (!srcgroup) {
-   ret = -EINVAL;
+   if (!srcgroup)
goto unlock;
-   }
dstgroup-rfer = srcgroup-rfer - level_size;
dstgroup-rfer_cmpr = srcgroup-rfer_cmpr - level_size;
srcgroup-excl = level_size;
@@ -1383,10 +1381,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle 
*trans,
qgroup_dirty(fs_info, srcgroup);
}
 
-   if (!inherit) {
-   ret = -EINVAL;
+   if (!inherit)
goto unlock;
-   }
 
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i  inherit-num_qgroups; ++i) {
-- 
1.7.3.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Workaround for hardlink count problem?

2012-09-11 Thread Arne Jansen
On 11.09.2012 01:38, Jan Engelhardt wrote:
 
 On Tuesday 2012-09-11 01:09, Martin Steigerwald wrote:
 What about:

 - copy first backup version
 - btrfs subvol create first next
 - copy next backup version
 - btrfs subvol create previous next

 Wouldn't btrfs subvolume snapshot, plus rsync --inplace more
 useful here? That is. if the original hardlink is caused by multiple
 versions of backup of the same file.

 Sure, I meant subvol snapshot in above example. Thanks for noticing.

 But I do not use --inplace as it conflicts with some other rsync options I 
 like to use:
 
 It is a tradeoff.
 
 rsync --inplace leads to fragmentation which is detrimental for the
 speed of reads (and read-write-cycles as used by rsync) of big files
 (multi-GB) that are regularly updated, but it is probably even worse
 for smaller-than-GB files because percent-wise, they are even more
 fragmented.
 
 $ filefrag */vm/intranet.dsk
 snap-2012-08-15/vm/intranet.dsk: 23 extents found
 snap-2012-08-16/vm/intranet.dsk: 23 extents found
 snap-2012-08-17/vm/intranet.dsk: 4602 extents found
 snap-2012-08-18/vm/intranet.dsk: 6221 extents found
 snap-2012-08-19/vm/intranet.dsk: 6604 extents found
 snap-2012-08-20/vm/intranet.dsk: 6694 extents found
 snap-2012-08-21/vm/intranet.dsk: 6650 extents found
 snap-2012-08-22/vm/intranet.dsk: 6760 extents found
 snap-2012-08-23/vm/intranet.dsk: 7226 extents found
 snap-2012-08-24/vm/intranet.dsk: 7159 extents found
 snap-2012-08-25/vm/intranet.dsk: 7464 extents found
 snap-2012-08-26/vm/intranet.dsk: 7746 extents found
 snap-2012-08-27/vm/intranet.dsk: 8017 extents found
 snap-2012-08-28/vm/intranet.dsk: 8145 extents found
 snap-2012-08-29/vm/intranet.dsk: 8393 extents found
 snap-2012-08-30/vm/intranet.dsk: 8474 extents found
 snap-2012-08-31/vm/intranet.dsk: 9150 extents found
 snap-2012-09-01/vm/intranet.dsk: 8900 extents found
 snap-2012-09-02/vm/intranet.dsk: 9218 extents found
 snap-2012-09-03/vm/intranet.dsk: 9575 extents found
 snap-2012-09-04/vm/intranet.dsk: 9760 extents found
 snap-2012-09-05/vm/intranet.dsk: 9839 extents found
 snap-2012-09-06/vm/intranet.dsk: 9907 extents found
 snap-2012-09-07/vm/intranet.dsk: 10006 extents found
 snap-2012-09-08/vm/intranet.dsk: 10248 extents found
 snap-2012-09-09/vm/intranet.dsk: 10488 extents found
 
 Without --inplace (prerequisite to use -S) however, it will recreate
 a file if it has been touched. While this easily avoids fragmentation
 (since it won't share any data blocks with the old one), it can take
 up more space with the big files.
 
 -ax --acls --xattrs --sparse --hard-links --del --delete-excluded --
 
 I knew short options would be helpful here: -axAXSH
 (why don't they just become the standard... they are in like almost
 every other rsync invocation I ever had)
 
   -S, --sparse
  Try to handle sparse files efficiently so they  take  up
  less space on the destination.  Conflicts with --inplace
  because it’s not possible to overwrite data in a  sparse
  fashion.
 
 Oh and if anybody from the rsync camp reads it: with hole-punching
 now supported in Linux, there is no reason not to support -S with
 --inplace, I think.

I sent a patch for this quite some time ago:

https://bugzilla.samba.org/show_bug.cgi?id=7194

Feel free to push it :)

-Arne

 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Cannot create subvolume with quota enabled

2012-09-10 Thread Arne Jansen
On 09/10/2012 08:13 PM, Andreas Philipp wrote:
 Hi Arne,
 
 On 08.09.2012 00:04, Arne Jansen wrote:
 Hi Andreas,

 On 09/07/2012 09:36 PM, Andreas Philipp wrote:
 Hi,

 The following steps reproduce the error. My kernel is 3.6-rc4 and 
 btrfs-progs are at commit 89fe5b5f666c247aa3173745fb87c710f3a71a4a
 from 
 git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git


 master.
 thor ~ # mkfs.btrfs -L test /dev/vg1/test

 WARNING! - Btrfs Btrfs v0.19 IS EXPERIMENTAL WARNING! - see
 http://btrfs.wiki.kernel.org before using

 fs created label test on /dev/vg1/test nodesize 4096 leafsize 4096
 sectorsize 4096 size 20.00GB Btrfs Btrfs v0.19 thor ~ # mount
 /dev/vg1/test /mnt/tmp thor ~ # btrfs quota enable /mnt/tmp thor ~
 # btrfs subvolume create /mnt/tmp/test Create subvolume
 '/mnt/tmp/test' ERROR: cannot create subvolume - Invalid argument
 Thanks for giving quota a try. I sent a fix separately with
 the subject

 [PATCH] Btrfs: btrfs_qgroup_inherit wrongly returns an error

 Could you please see if it fixes the problem?
 With the patch applied (on top of either 3.6-rc4 or 3.6-rc5) I can
 create subvolumes as you see below.
 
 root@debian:~# mkfs.btrfs /dev/sdb
 
 WARNING! - Btrfs Btrfs v0.19 IS EXPERIMENTAL
 WARNING! - see http://btrfs.wiki.kernel.org before using
 
 fs created label (null) on /dev/sdb
 nodesize 4096 leafsize 4096 sectorsize 4096 size 3.00GB
 Btrfs Btrfs v0.19
 root@debian:~# mount /dev/sdb /mnt/test
 root@debian:~# btrfs quota enable /mnt/test
 root@debian:~# btrfs subvolume create /mnt/test/subvolume
 Create subvolume '/mnt/test/subvolume'
 root@debian:~# btrfs qgroup show /mnt/test
 0/257 4096 4096
 root@debian:~# dd if=/dev/urandom of=/mnt/test/subvolume/testfile
 bs=1024k count=25
 25+0 records in
 25+0 records out
 26214400 bytes (26 MB) copied, 2.95321 s, 8.9 MB/s
 root@debian:~# btrfs qgroup show /mnt/test
 0/257 4096 4096
 root@debian:~# du -hs /mnt/test/*
 25M /mnt/test/subvolume
 
 At least I expected that the output of ' btrfs qgroup show' changes
 after some data got written to a subvolume which is assigned to a
 qgroup. (Hope, I got it right.)

due to delayed-*, the accounting also sometimes shows up delayed.
After a sync you should see the changes.

 
 Thanks,
 Andreas
 

 Thanks,
 Arne
 Please, do not hesitate to contact me for any further information
 etc.

 Thanks, Andreas -- To unsubscribe from this list: send the line
 unsubscribe linux-btrfs in the body of a message to
 majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: btrfs_qgroup_inherit wrongly returns an error

2012-09-07 Thread Arne Jansen
When using the V1-version of the snap/subvol creation ioctl
btrfs_qgroup_inherit wrongly returns an error because no inherit parameter
is given. Fix the return value.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/qgroup.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 38b42e7..090405d 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1383,10 +1383,8 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle 
*trans,
qgroup_dirty(fs_info, srcgroup);
}
 
-   if (!inherit) {
-   ret = -EINVAL;
+   if (!inherit)
goto unlock;
-   }
 
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i  inherit-num_qgroups; ++i) {
-- 
1.7.3.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Cannot create subvolume with quota enabled

2012-09-07 Thread Arne Jansen
Hi Andreas,

On 09/07/2012 09:36 PM, Andreas Philipp wrote:
 Hi,
 
 The following steps reproduce the error. My kernel is 3.6-rc4 and 
 btrfs-progs are at commit 89fe5b5f666c247aa3173745fb87c710f3a71a4a
 from 
 git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-progs.git

 
master.
 
 thor ~ # mkfs.btrfs -L test /dev/vg1/test
 
 WARNING! - Btrfs Btrfs v0.19 IS EXPERIMENTAL WARNING! - see
 http://btrfs.wiki.kernel.org before using
 
 fs created label test on /dev/vg1/test nodesize 4096 leafsize 4096
 sectorsize 4096 size 20.00GB Btrfs Btrfs v0.19 thor ~ # mount
 /dev/vg1/test /mnt/tmp thor ~ # btrfs quota enable /mnt/tmp thor ~
 # btrfs subvolume create /mnt/tmp/test Create subvolume
 '/mnt/tmp/test' ERROR: cannot create subvolume - Invalid argument

Thanks for giving quota a try. I sent a fix separately with
the subject

[PATCH] Btrfs: btrfs_qgroup_inherit wrongly returns an error

Could you please see if it fixes the problem?

Thanks,
Arne
 
 Please, do not hesitate to contact me for any further information
 etc.
 
 Thanks, Andreas -- To unsubscribe from this list: send the line
 unsubscribe linux-btrfs in the body of a message to
 majord...@vger.kernel.org More majordomo info at
 http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: add malloc check in transaction.h/btrfs_start_transaction

2012-08-16 Thread Arne Jansen
On 16.08.2012 09:57, Wang Sheng-Hui wrote:
 For malloc may fail, we should check it before assign
 values to the fields of struct btrfs_trans_handle *h.
 
 Signed-off-by: Wang Sheng-Hui shh...@gmail.com
 ---
  transaction.h |5 +
  1 files changed, 5 insertions(+), 0 deletions(-)
 
 diff --git a/transaction.h b/transaction.h
 index a1070e0..d4e42a1 100644
 --- a/transaction.h
 +++ b/transaction.h
 @@ -32,7 +32,12 @@ static inline struct btrfs_trans_handle *
  btrfs_start_transaction(struct btrfs_root *root, int num_blocks)
  {
   struct btrfs_fs_info *fs_info = root-fs_info;
 +
   struct btrfs_trans_handle *h = malloc(sizeof(*h));
 + if (!h) {
 + BUG();
 + return NULL;
 + }

a more simple way would be to write
BUG_ON(!h);

  
   BUG_ON(root-commit_root);
   BUG_ON(fs_info-running_transaction);

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: trivial code style fix in root-tree.c

2012-08-16 Thread Arne Jansen
On 16.08.2012 15:17, Wang Sheng-Hui wrote:
 Add code indent to the bad styled statements.
 
 Signed-off-by: Wang Sheng-Hui shh...@gmail.com
 ---
  root-tree.c |   10 --
  1 files changed, 4 insertions(+), 6 deletions(-)
 
 diff --git a/root-tree.c b/root-tree.c
 index 782472c..39cfef6 100644
 --- a/root-tree.c
 +++ b/root-tree.c
 @@ -182,12 +182,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, 
 struct btrfs_root *root,
   if (ret  0)
   goto out;
   if (ret) {
 -btrfs_print_leaf(root, path-nodes[0]);
 -printk(failed to del %llu %u %llu\n,
 - (unsigned long long)key-objectid,
 - key-type,
 - (unsigned long long)key-offset);
 -

I guess Alexander intentionally put in the bad indentation
to remind him to remove this code before submitting the
patch :)

 + btrfs_print_leaf(root, path-nodes[0]);
 + printk(failed to del %llu %u %llu\n,
 + (unsigned long long)key-objectid, key-type,
 + (unsigned long long)key-offset);
   }
   BUG_ON(ret != 0);
   leaf = path-nodes[0];

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] btrfs-progs: trivial code style fix in root-tree.c

2012-08-16 Thread Arne Jansen
On 16.08.2012 15:26, Wang Sheng-Hui wrote:
 On 2012年08月16日 21:23, Arne Jansen wrote:
 On 16.08.2012 15:17, Wang Sheng-Hui wrote:
 Add code indent to the bad styled statements.

 Signed-off-by: Wang Sheng-Hui shh...@gmail.com
 ---
  root-tree.c |   10 --
  1 files changed, 4 insertions(+), 6 deletions(-)

 diff --git a/root-tree.c b/root-tree.c
 index 782472c..39cfef6 100644
 --- a/root-tree.c
 +++ b/root-tree.c
 @@ -182,12 +182,10 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, 
 struct btrfs_root *root,
 if (ret  0)
 goto out;
 if (ret) {
 -btrfs_print_leaf(root, path-nodes[0]);
 -printk(failed to del %llu %u %llu\n,
 -   (unsigned long long)key-objectid,
 -   key-type,
 -   (unsigned long long)key-offset);
 -

 I guess Alexander intentionally put in the bad indentation
 to remind him to remove this code before submitting the
 patch :)

 +   btrfs_print_leaf(root, path-nodes[0]);
 +   printk(failed to del %llu %u %llu\n,
 +   (unsigned long long)key-objectid, key-type,
 +   (unsigned long long)key-offset);
 }
 
 When I saw the BUG_ON, I wonder do we still need the above branch statement.
 It looks just to be helpful for debug - give out some info, right? 

Yes. So either we delete it completely or we move the BUG_ON
into the block as a BUG_ON(1) and add a Btrfs: -prefix to the
printk.
On the other hand, I just saw this is progs, so I think it can
just be deleted.

 
 BUG_ON(ret != 0);
 leaf = path-nodes[0];

 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs: fix race in run_clustered_refs

2012-08-09 Thread Arne Jansen
With commit

commit d1270cd91f308c9d22b2804720c36ccd32dbc35e
Author: Arne Jansen sensi...@gmx.net
Date:   Tue Sep 13 15:16:43 2011 +0200

 Btrfs: put back delayed refs that are too new

I added a window where the delayed_ref's head-ref_mod code can diverge
from the sum of the remaining refs, because we release the head-mutex
in the middle. This leads to btrfs_lookup_extent_info returning wrong
numbers. This patch fixes this by adjusting the head's ref_mod with each
delayed ref we run.

Signed-off-by: Arne Jansen sensi...@gmx.net
---

Changes v1-v2:
   Changed commit message. As Josef pointed out the race is new and has
   not been present before d1270cd9. The code is unchanged.

---
 fs/btrfs/extent-tree.c |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e66dc9a..60d175a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2318,6 +2318,23 @@ static noinline int run_clustered_refs(struct 
btrfs_trans_handle *trans,
ref-in_tree = 0;
rb_erase(ref-rb_node, delayed_refs-root);
delayed_refs-num_entries--;
+   if (locked_ref) {
+   /*
+* when we play the delayed ref, also correct the
+* ref_mod on head
+*/
+   switch (ref-action) {
+   case BTRFS_ADD_DELAYED_REF:
+   case BTRFS_ADD_DELAYED_EXTENT:
+   locked_ref-node.ref_mod -= ref-ref_mod;
+   break;
+   case BTRFS_DROP_DELAYED_REF:
+   locked_ref-node.ref_mod += ref-ref_mod;
+   break;
+   default:
+   WARN_ON(1);
+   }
+   }
spin_unlock(delayed_refs-lock);
 
ret = run_one_delayed_ref(trans, root, ref, extent_op,
-- 
1.7.7.3

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs panic in 3.5.0

2012-08-09 Thread Arne Jansen
On 09.08.2012 04:52, Marc MERLIN wrote:
 On Tue, Aug 07, 2012 at 11:47:36AM -0700, Marc MERLIN wrote:
 On Tue, Aug 07, 2012 at 08:14:23PM +0200, Arne Jansen wrote:
 On 08/07/2012 07:40 PM, Marc MERLIN wrote:
 Unfortunately I only have a screenshot.

 Apparently the panic was in 
 btrfs_set_lock_blocking_rw
 with a RIP in btrfs_cow_block

 Can you please resolve btrfs_cow_block+0x3b to a line number?

 gdb btrfs.ko
 (gdb) info line *btrfs_cow_block+0x3b

 So, I'm not very good at this, sorry if I'm doing it wrong:
 gandalfthegreat:~# gdb 
 /lib/modules/3.5.0-amd64-preempt-noide-20120410/kernel/fs/btrfs/btrfs.ko
 Reading symbols from 
 /lib/modules/3.5.0-amd64-preempt-noide-20120410/kernel/fs/btrfs/btrfs.ko...(no
  debugging symbols found)...done.
 (gdb) info line *btrfs_cow_block+0x3b
 No line number information available for address 0x9a6e

 Mmmh, it seems that I'm missing a kernel option that adds symbols in modules?

 I can add it for my next kernel compile. Do you have the config option name
 off hand?

 I put my module here if that helps:
 http://marc.merlins.org/tmp/btrfs.ko
 
 I felt bad for having a kernel without debug symbols it seems, so I looked
 at my kernel config and I do have:
 CONFIG_DEBUG_BUGVERBOSE=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_DEBUG_INFO_REDUCED is not set
 
 Any idea what else I'm missing to provide better debug info if I have a
 problem again?
 
 And is it reasonably easy to take the .ko apparently without line numbers,
 like the one I gave you, and infer the line of code for a function offset?

The .ko is fine. It crashes here:

noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *parent, int parent_slot,
struct extent_buffer **cow_ret)
{
u64 search_start;
int ret;

if (trans-transaction != root-fs_info-running_transaction) {
printk(KERN_CRIT trans %llu running %llu\n,
   (unsigned long long)trans-transid,
   (unsigned long long)
   root-fs_info-running_transaction-transid);
  ^^

WARN_ON(1);
}

fs_info-running_transaction is probably NULL.


 
 Thanks,
 Marc

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix race in run_clustered refs

2012-08-08 Thread Arne Jansen
run_clustered_refs runs all delayed refs for one head one by one. During
the runs, the delayed_refs-lock is released. In this window, the ref_mod
from the head does not match the sum of all refs below the head. When
btrfs_lookup_extent_info is run in this window, it gives inconsistent
results.
The qgroups patch added code to put delayed refs back, thus opening this
window very wide.
This patch assures that head-ref_mod always matches the queued refs, but
a window still remains where on-disk refs + delayed_refs miss the ref
currently being run.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/extent-tree.c |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e66dc9a..60d175a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2318,6 +2318,23 @@ static noinline int run_clustered_refs(struct 
btrfs_trans_handle *trans,
ref-in_tree = 0;
rb_erase(ref-rb_node, delayed_refs-root);
delayed_refs-num_entries--;
+   if (locked_ref) {
+   /*
+* when we play the delayed ref, also correct the
+* ref_mod on head
+*/
+   switch (ref-action) {
+   case BTRFS_ADD_DELAYED_REF:
+   case BTRFS_ADD_DELAYED_EXTENT:
+   locked_ref-node.ref_mod -= ref-ref_mod;
+   break;
+   case BTRFS_DROP_DELAYED_REF:
+   locked_ref-node.ref_mod += ref-ref_mod;
+   break;
+   default:
+   WARN_ON(1);
+   }
+   }
spin_unlock(delayed_refs-lock);
 
ret = run_one_delayed_ref(trans, root, ref, extent_op,
-- 
1.7.7.3

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs panic in 3.5.0

2012-08-07 Thread Arne Jansen
On 08/07/2012 07:40 PM, Marc MERLIN wrote:
 Unfortunately I only have a screenshot.
 
 Apparently the panic was in 
 btrfs_set_lock_blocking_rw
 with a RIP in btrfs_cow_block
 

Can you please resolve btrfs_cow_block+0x3b to a line number?

gdb btrfs.ko
(gdb) info line *btrfs_cow_block+0x3b

Thanks,
Arne

 Screenshot here:
 http://marc.merlins.org/tmp/btrfs_oops.jpg
 
 Because the display looks a bit messed up, I can't tell if the ata error
 happened before or after the oops.
 
 System rebooted ok.
 
 Was there a better way to get this ooops if I didn't have serial console?
 
 Marc
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: wait_for_more_refs not waking up

2012-08-06 Thread Arne Jansen
On 08/06/2012 10:10 PM, Chris Mason wrote:
 On Fri, Aug 03, 2012 at 01:11:31AM -0600, Alexander Block wrote:
 I got a deadlock happening while creating a snapshot and ordered
 writes are pending. I can reproduce it 100% using btrfs send/receive.
 It happens when I receive a set of streams on a freshly created FS. I
 think I can exclude send/receive as the source of the problem as it
 happens while receiving, even when rebooting between send and receive.
 Receive does not depend on any extra kernel support and only uses
 normal vfs syscalls.
 
 Are you seeing this one on the git tree I sent to Linus?  I triggered
 this once and then haven't seen it since I put in the barrier before the
 waitqueue active check.

I'll send a patch for this in a moment.

 
 -chris
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix deadlock in wait_for_more_refs

2012-08-06 Thread Arne Jansen
Commit a168650c introduced a waiting mechanism to prevent busy waiting in
btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations,
where a tree_mod_seq is held while waiting for the io to complete, while
the end_io calls btrfs_run_delayed_refs.
This whole mechanism is unnecessary. If not enough runnable refs are
available to satisfy count, just return as count is more like a guideline
than a strict requirement.
In case we have to run all refs, commit transaction makes sure that no
other threads are working in the transaction anymore, so we just assert
here that no refs are blocked.

Signed-off-by: Arne Jansen sensi...@gmx.net
---
 fs/btrfs/ctree.c   |6 
 fs/btrfs/ctree.h   |1 -
 fs/btrfs/delayed-ref.c |8 -
 fs/btrfs/disk-io.c |2 -
 fs/btrfs/extent-tree.c |   77 +---
 5 files changed, 21 insertions(+), 73 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 9d7621f..08e0b11 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -421,12 +421,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
spin_unlock(fs_info-tree_mod_seq_lock);
 
/*
-* we removed the lowest blocker from the blocker list, so there may be
-* more processible delayed refs.
-*/
-   wake_up(fs_info-tree_mod_seq_wait);
-
-   /*
 * anything that's lower than the lowest existing (read: blocked)
 * sequence number can be removed from the tree.
 */
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index adb1cd7..2b90942 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1252,7 +1252,6 @@ struct btrfs_fs_info {
atomic_t tree_mod_seq;
struct list_head tree_mod_seq_list;
struct seq_list tree_mod_seq_elem;
-   wait_queue_head_t tree_mod_seq_wait;
 
/* this protects tree_mod_log */
rwlock_t tree_mod_log_lock;
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index da7419e..7561431 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -662,9 +662,6 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info 
*fs_info,
add_delayed_tree_ref(fs_info, trans, ref-node, bytenr,
   num_bytes, parent, ref_root, level, action,
   for_cow);
-   if (!need_ref_seq(for_cow, ref_root) 
-   waitqueue_active(fs_info-tree_mod_seq_wait))
-   wake_up(fs_info-tree_mod_seq_wait);
spin_unlock(delayed_refs-lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, ref-node, extent_op);
@@ -713,9 +710,6 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info 
*fs_info,
add_delayed_data_ref(fs_info, trans, ref-node, bytenr,
   num_bytes, parent, ref_root, owner, offset,
   action, for_cow);
-   if (!need_ref_seq(for_cow, ref_root) 
-   waitqueue_active(fs_info-tree_mod_seq_wait))
-   wake_up(fs_info-tree_mod_seq_wait);
spin_unlock(delayed_refs-lock);
if (need_ref_seq(for_cow, ref_root))
btrfs_qgroup_record_ref(trans, ref-node, extent_op);
@@ -744,8 +738,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info 
*fs_info,
   num_bytes, BTRFS_UPDATE_DELAYED_HEAD,
   extent_op-is_data);
 
-   if (waitqueue_active(fs_info-tree_mod_seq_wait))
-   wake_up(fs_info-tree_mod_seq_wait);
spin_unlock(delayed_refs-lock);
return 0;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 502b20c..a7ad8fc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2035,8 +2035,6 @@ int open_ctree(struct super_block *sb,
fs_info-free_chunk_space = 0;
fs_info-tree_mod_log = RB_ROOT;
 
-   init_waitqueue_head(fs_info-tree_mod_seq_wait);
-
/* readahead state */
INIT_RADIX_TREE(fs_info-reada_tree, GFP_NOFS  ~__GFP_WAIT);
spin_lock_init(fs_info-reada_lock);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e1b153..a9ca92e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2318,12 +2318,6 @@ static noinline int run_clustered_refs(struct 
btrfs_trans_handle *trans,
ref-in_tree = 0;
rb_erase(ref-rb_node, delayed_refs-root);
delayed_refs-num_entries--;
-   /*
-* we modified num_entries, but as we're currently running
-* delayed refs, skip
-* wake_up(delayed_refs-seq_wait);
-* here.
-*/
spin_unlock(delayed_refs-lock);
 
ret = run_one_delayed_ref(trans, root, ref, extent_op,
@@ -2350,22 +2344,6 @@ next:
return count;
 }
 
-static void wait_for_more_refs(struct btrfs_fs_info *fs_info

Re: [PATCH] Btrfs: fix deadlock in wait_for_more_refs

2012-08-06 Thread Arne Jansen
On 08/07/2012 07:03 AM, Mitch Harder wrote:
 On Mon, Aug 6, 2012 at 3:18 PM, Arne Jansen sensi...@gmx.net wrote:
 Commit a168650c introduced a waiting mechanism to prevent busy waiting in
 btrfs_run_delayed_refs. This can deadlock with btrfs_run_ordered_operations,
 where a tree_mod_seq is held while waiting for the io to complete, while
 the end_io calls btrfs_run_delayed_refs.
 This whole mechanism is unnecessary. If not enough runnable refs are
 available to satisfy count, just return as count is more like a guideline
 than a strict requirement.
 In case we have to run all refs, commit transaction makes sure that no
 other threads are working in the transaction anymore, so we just assert
 here that no refs are blocked.

 
 I've been testing this patch after manually merging on top of Josef's
 Btrfs: barrier before waitqueue_active V2 patch.
 
 With that arrangement, I've been unable to reproduce the deadlock on my 
 system.
 
 I'll continue banging away on it tomorrow, and let you know if I
 attain a deadlock.
 
 Also, let me know if you need me to test without including Josef's
 added barriers.

The problem at hand hadn't had anything to do with barriers, so Josef's
patch shouldn't be necessary for this particular problem. If it fixes
other ones I can't tell.

 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: increase the size of the free space cache

2012-08-06 Thread Arne Jansen
On 08/06/2012 09:46 PM, Josef Bacik wrote:
 Arne was complaining about the space cache having mismatching generation
 numbers when debugging a deadlock.  This is because we can run out of space
 in our preallocated range for our space cache if you have a pretty
 fragmented amount of space in your pinned space.  So just increase the
 amount of space we preallocate for space cache so we can be sure to have
 enough space.  This will only really affect data ranges since their the only
 chunks that end up larger than 256MB.  Thanks,
 
 Signed-off-by: Josef Bacik jba...@fusionio.com

Arne does not complain anymore.

Tested-by: Arne Jansen sensi...@gmx.net

 ---
  fs/btrfs/extent-tree.c |   15 +++
  1 files changed, 7 insertions(+), 8 deletions(-)
 
 diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
 index 45c69c4..55d33b8 100644
 --- a/fs/btrfs/extent-tree.c
 +++ b/fs/btrfs/extent-tree.c
 @@ -3007,17 +3007,16 @@ again:
   }
   spin_unlock(block_group-lock);
  
 - num_pages = (int)div64_u64(block_group-key.offset, 1024 * 1024 * 1024);
 + /*
 +  * Try to preallocate enough space based on how big the block group is.
 +  * Keep in mind this has to include any pinned space which could end up
 +  * taking up quite a bit since it's not folded into the other space
 +  * cache.
 +  */
 + num_pages = (int)div64_u64(block_group-key.offset, 256 * 1024 * 1024);
   if (!num_pages)
   num_pages = 1;
  
 - /*
 -  * Just to make absolutely sure we have enough space, we're going to
 -  * preallocate 12 pages worth of space for each block group.  In
 -  * practice we ought to use at most 8, but we need extra space so we can
 -  * add our header and have a terminator between the extents and the
 -  * bitmaps.
 -  */
   num_pages *= 16;
   num_pages *= PAGE_CACHE_SIZE;
  
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: barrier before waitqueue_active

2012-08-05 Thread Arne Jansen
On 08/03/2012 04:43 PM, Mitch Harder wrote:
 On Wed, Aug 1, 2012 at 7:21 PM, Mitch Harder
 mitch.har...@sabayonlinux.org wrote:
 On Wed, Aug 1, 2012 at 3:25 PM, Josef Bacik jba...@fusionio.com wrote:
 We need an smb_mb() before waitqueue_active to avoid missing wakeups.
 Before Mitch was hitting a deadlock between the ordered flushers and the
 transaction commit because the ordered flushers were waiting for more refs
 and were never woken up, so those smp_mb()'s are the most important.
 Everything else I added for correctness sake and to avoid getting bitten by
 this again somewhere else.  Thanks,


 This patch seems to make it tougher to hit a deadlock, but I'm still
 encountering intermittent deadlocks using this patch when running
 multiple rsync threads.

 I've also tested Patch 2, and that has me hitting a deadlock even
 quicker (when starting several copying threads).

 I also found a slight performance hit using this patch.  On a 3.4.6
 kernel (merged with the 3.5_rc for-linus branch), I would typically
 complete my rsync test in ~265 seconds.  Also, I can't recall hitting
 a deadlock on the 3.4.6 kernel (with 3.5_rc for-linus).  When using
 this patch, the test would take ~310 seconds (when it didn't hit a
 deadlock).

 
 I've bisected my deadlock back to:
 Btrfs: hooks for qgroup to record delayed refs (commit 546adb0d).
 

I've got it reproduced here and, I think, nailed it down. I'll send a
patch tomorrow after discussing it with Jan.

-Arne

 This issue may be the same problem Alexander Block is discussing in
 another thread on the Btrfs Mailing List:
 http://article.gmane.org/gmane.comp.file-systems.btrfs/19028
 
 I'm using multiple rsync threads instead of the new send/receive
 function.  But we're both hitting deadlocks that bisect back to the
 same commit.
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: remove superblock writing after fatal error

2012-08-02 Thread Arne Jansen
On 02.08.2012 12:36, Liu Bo wrote:
 On 08/02/2012 06:30 PM, Stefan Behrens wrote:
 On Wed, 01 Aug 2012 16:31:54 +0200, Stefan Behrens wrote:
 On Wed, 01 Aug 2012 21:31:58 +0800, Liu Bo wrote:
 On 08/01/2012 09:07 PM, Jan Schmidt wrote:
 On Wed, August 01, 2012 at 14:02 (+0200), Liu Bo wrote:
 On 08/01/2012 07:45 PM, Stefan Behrens wrote:
 With commit acce952b0, btrfs was changed to flag the filesystem with
 BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal
 error happened like a write I/O errors of all mirrors.
 In such situations, on unmount, the superblock is written in
 btrfs_error_commit_super(). This is done with the intention to be able
 to evaluate the error flag on the next mount. A warning is printed
 in this case during the next mount and the log tree is ignored.

 The issue is that it is possible that the superblock points to a root
 that was not written (due to write I/O errors).
 The result is that the filesystem cannot be mounted. btrfsck also does
 not start and all the other btrfs-progs tools fail to start as well.
 However, mount -o recovery is working well and does the right things
 to recover the filesystem (i.e., don't use the log root, clear the
 free space cache and use the next mountable root that is stored in the
 root backup array).

 This patch removes the writing of the superblock when
 BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error
 flag in the mount function.


 Yes, I have to admit that this can be a serious problem.

 But we'll need to send the error flag stored in the super block into
 disk in the future so that the next mount can find it unstable and do
 fsck by itself maybe.

 Hum, that's possible. However, I neither see

 a) a safe way to get that flag to disk

 nor

 b) a situation where this flag would help. When we abort a transaction, 
 we just
 roll everything back to the last commit, i.e. a consistent state. So if 
 we stop
 writing a potentially corrupt super block, we should be fine anyway. Or 
 am I
 missing something?


 I'm just wondering if we can roll everything back well, why do we need 
 fsck?

 If the disks support barriers, we roll everything back very well. The
 most recent superblock on the disks always defines a consistent
 filesystem state. There are only two remaining filesystem consistency
 issues left that can cause inconsistent states, one is the one that the
 patch in this email addresses, and the second one is that the error
 result from barrier_all_devices() is ignored (which I want to change next).

 Hi Liu Bo,

 Do you have any remaining objections to that patch?

 
 Hi Stefan,
 
 Still I have another question:
 
 Our metadata can be flushed into disk if we reach the limit, 32k, so we
 can end up with updated metadata and the latest superblock if we do not
 write the current super block.

The old metadata stays valid until the new superblock is written,
so no problem here, or maybe I don't understand your question :)

 
 Any ideas?
 
 thanks,
 liubo
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: remove superblock writing after fatal error

2012-08-02 Thread Arne Jansen
On 02.08.2012 13:34, Liu Bo wrote:
 On 08/02/2012 07:18 PM, Arne Jansen wrote:
 On 02.08.2012 12:36, Liu Bo wrote:
 On 08/02/2012 06:30 PM, Stefan Behrens wrote:
 On Wed, 01 Aug 2012 16:31:54 +0200, Stefan Behrens wrote:
 On Wed, 01 Aug 2012 21:31:58 +0800, Liu Bo wrote:
 On 08/01/2012 09:07 PM, Jan Schmidt wrote:
 On Wed, August 01, 2012 at 14:02 (+0200), Liu Bo wrote:
 On 08/01/2012 07:45 PM, Stefan Behrens wrote:
 With commit acce952b0, btrfs was changed to flag the filesystem with
 BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal
 error happened like a write I/O errors of all mirrors.
 In such situations, on unmount, the superblock is written in
 btrfs_error_commit_super(). This is done with the intention to be able
 to evaluate the error flag on the next mount. A warning is printed
 in this case during the next mount and the log tree is ignored.

 The issue is that it is possible that the superblock points to a root
 that was not written (due to write I/O errors).
 The result is that the filesystem cannot be mounted. btrfsck also does
 not start and all the other btrfs-progs tools fail to start as well.
 However, mount -o recovery is working well and does the right things
 to recover the filesystem (i.e., don't use the log root, clear the
 free space cache and use the next mountable root that is stored in the
 root backup array).

 This patch removes the writing of the superblock when
 BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error
 flag in the mount function.


 Yes, I have to admit that this can be a serious problem.

 But we'll need to send the error flag stored in the super block into
 disk in the future so that the next mount can find it unstable and do
 fsck by itself maybe.

 Hum, that's possible. However, I neither see

 a) a safe way to get that flag to disk

 nor

 b) a situation where this flag would help. When we abort a transaction, 
 we just
 roll everything back to the last commit, i.e. a consistent state. So if 
 we stop
 writing a potentially corrupt super block, we should be fine anyway. Or 
 am I
 missing something?


 I'm just wondering if we can roll everything back well, why do we need 
 fsck?

 If the disks support barriers, we roll everything back very well. The
 most recent superblock on the disks always defines a consistent
 filesystem state. There are only two remaining filesystem consistency
 issues left that can cause inconsistent states, one is the one that the
 patch in this email addresses, and the second one is that the error
 result from barrier_all_devices() is ignored (which I want to change 
 next).

 Hi Liu Bo,

 Do you have any remaining objections to that patch?


 Hi Stefan,

 Still I have another question:

 Our metadata can be flushed into disk if we reach the limit, 32k, so we
 can end up with updated metadata and the latest superblock if we do not
 write the current super block.

 The old metadata stays valid until the new superblock is written,
 so no problem here, or maybe I don't understand your question :)

 
 Yeah, Arne, you're right :)
 
 But for undetected and unexpected errors as Arne had mentioned,  I want
 to keep the error flag which is able to inform users that this FS is
 recommended (but not must) to do fsck at least.

How about storing the flag in a different location than the superblock?
If the fs is in an unknown state, every write potentially makes it only
worse.

 
 thanks,
 liubo
 

 Any ideas?

 thanks,
 liubo
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: remove superblock writing after fatal error

2012-08-02 Thread Arne Jansen
On 02.08.2012 15:57, David Sterba wrote:
 On Thu, Aug 02, 2012 at 03:46:50PM +0200, Arne Jansen wrote:
 Anyway, for now, our error flag has only been stored in memory, so what
 about just keep it until we find a graceful way?

 Yeah, we need this patch to restore consistency. We can define a fixed
 area on disk (e.g. behind the superblock) where we can write the flag
 to without risking the superblock.
 
 Is it possible that sectors around the superblock are somehow damaged
 and unwritable that writing to them will fail as well? Ie. some safe
 distance from the superblock would be needed.

In about 1MB distance you're side-by-side with the superblock, so closer
to it might even be safer ;)

 
 david

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: remove superblock writing after fatal error

2012-08-01 Thread Arne Jansen
On 01.08.2012 15:31, Liu Bo wrote:
 On 08/01/2012 09:07 PM, Jan Schmidt wrote:
 On Wed, August 01, 2012 at 14:02 (+0200), Liu Bo wrote:
 On 08/01/2012 07:45 PM, Stefan Behrens wrote:
 With commit acce952b0, btrfs was changed to flag the filesystem with
 BTRFS_SUPER_FLAG_ERROR and switch to read-only mode after a fatal
 error happened like a write I/O errors of all mirrors.
 In such situations, on unmount, the superblock is written in
 btrfs_error_commit_super(). This is done with the intention to be able
 to evaluate the error flag on the next mount. A warning is printed
 in this case during the next mount and the log tree is ignored.

 The issue is that it is possible that the superblock points to a root
 that was not written (due to write I/O errors).
 The result is that the filesystem cannot be mounted. btrfsck also does
 not start and all the other btrfs-progs tools fail to start as well.
 However, mount -o recovery is working well and does the right things
 to recover the filesystem (i.e., don't use the log root, clear the
 free space cache and use the next mountable root that is stored in the
 root backup array).

 This patch removes the writing of the superblock when
 BTRFS_SUPER_FLAG_ERROR is set, and removes the handling of the error
 flag in the mount function.


 Yes, I have to admit that this can be a serious problem.

 But we'll need to send the error flag stored in the super block into
 disk in the future so that the next mount can find it unstable and do
 fsck by itself maybe.

 Hum, that's possible. However, I neither see

 a) a safe way to get that flag to disk

 nor

 b) a situation where this flag would help. When we abort a transaction, we 
 just
 roll everything back to the last commit, i.e. a consistent state. So if we 
 stop
 writing a potentially corrupt super block, we should be fine anyway. Or am I
 missing something?

 
 I'm just wondering if we can roll everything back well, why do we need fsck?

Mostly for undetected errors.

 
 thanks,
 liubo
 
 -Jan

 
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 4/7] Btrfs: introduce subvol uuids and times

2012-07-23 Thread Arne Jansen
On 23.07.2012 21:41, Alexander Block wrote:
 On Mon, Jul 16, 2012 at 4:56 PM, Arne Jansen sensi...@gmx.net wrote:
 On 04.07.2012 15:38, Alexander Block wrote:
 +
 + ret = btrfs_update_root(trans, root-fs_info-tree_root,
 + root-root_key, root-root_item);
 + if (ret  0) {
 + goto out;

 are you leaking a trans handle here?

 btrfs_update_root is aborting the transaction in case of failure. Do I
 still need to call end_transaction?

It's your handle, you should free it.

...


 +struct btrfs_ioctl_received_subvol_args {
 + charuuid[BTRFS_UUID_SIZE];  /* in */
 + __u64   stransid;   /* in */
 + __u64   rtransid;   /* out */
 + struct timespec stime;  /* in */
 + struct timespec rtime;  /* out */
 + __u64   reserved[16];

 What is this reserved used for? I don't see a mechanism that could be
 used to signal that there are useful information here, other than
 using a different ioctl.

 The reserved is a result of a suggestion made by David. I can remove
 it again if you want...

I don't argue against some reserved space, I only have problems to
see how you can make use of them in the future when there's no way
to signal that they contain valid information. I should be sufficient
to define the reserved values to be 0 at the moment.

 +};
 +
  #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
  struct btrfs_ioctl_vol_args)
  #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
 @@ -359,6 +368,10 @@ struct btrfs_ioctl_get_dev_stats {
   struct btrfs_ioctl_ino_path_args)
  #define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
   struct btrfs_ioctl_ino_path_args)
 +
 +#define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
 + struct btrfs_ioctl_received_subvol_args)
 +
  #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
 struct btrfs_ioctl_get_dev_stats)
  #define BTRFS_IOC_GET_AND_RESET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 53, \
 diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
 index 24fb8ce..17d638e 100644
 --- a/fs/btrfs/root-tree.c
 +++ b/fs/btrfs/root-tree.c
 @@ -16,6 +16,7 @@
   * Boston, MA 021110-1307, USA.
   */

 +#include linux/uuid.h
  #include ctree.h
  #include transaction.h
  #include disk-io.h
 @@ -25,6 +26,9 @@
   * lookup the root with the highest offset for a given objectid.  The key 
 we do
   * find is copied into 'key'.  If we find something return 0, otherwise 1, 
  0
   * on error.
 + * We also check if the root was once mounted with an older kernel. If we 
 detect
 + * this, the new fields coming after 'level' get overwritten with zeros so 
 to
 + * invalidate the fields.

 ... This is detected by a mismatch of the 2 generation fields ... or 
 something
 like that.

 The current version (found in git only) has this new function which is
 called in find_last_root:
 void btrfs_read_root_item(struct btrfs_root *root,
struct extent_buffer *eb, int slot,
struct btrfs_root_item *item)
 
 The comment above this function explains what happens.

ok. Please regard most of my comments as an expression of my thoughts while
reading it. So they mark places where it might be useful to add comments
to make it easier for the next reader :)
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/7] Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive (part 1)

2012-07-21 Thread Arne Jansen
On 07/04/2012 03:38 PM, Alexander Block wrote:
 This patch introduces the BTRFS_IOC_SEND ioctl that is
 required for send. It allows btrfs-progs to implement
 full and incremental sends. Patches for btrfs-progs will
 follow.
 
 I had to split the patch as it got larger then 100k which is
 the limit for the mailing list. The first part only contains
 the send.h header and the helper functions for TLV handling
 and long path name handling and some other helpers. The second
 part contains the actual send logic from send.c
 
 Signed-off-by: Alexander Block abloc...@googlemail.com
 ---
[snip]
 +
 +struct name_cache_entry {
 + struct list_head list;
 + struct list_head use_list;

unused.

 + u64 ino;
 + u64 gen;
 + u64 parent_ino;
 + u64 parent_gen;
 + int ret;
 + int need_later_update;
 + int name_len;
 + char name[];
 +};
 +
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 6/7] Btrfs: introduce BTRFS_IOC_SEND for btrfs send/receive (part 1)

2012-07-18 Thread Arne Jansen
On 04.07.2012 15:38, Alexander Block wrote:
 This patch introduces the BTRFS_IOC_SEND ioctl that is
 required for send. It allows btrfs-progs to implement
 full and incremental sends. Patches for btrfs-progs will
 follow.
 
 I had to split the patch as it got larger then 100k which is
 the limit for the mailing list. The first part only contains
 the send.h header and the helper functions for TLV handling
 and long path name handling and some other helpers. The second
 part contains the actual send logic from send.c
 
 Signed-off-by: Alexander Block abloc...@googlemail.com
 ---
  fs/btrfs/Makefile |2 +-
  fs/btrfs/ioctl.h  |   10 +
  fs/btrfs/send.c   | 1009 
 +
  fs/btrfs/send.h   |  126 +++
  4 files changed, 1146 insertions(+), 1 deletion(-)
  create mode 100644 fs/btrfs/send.c
  create mode 100644 fs/btrfs/send.h
 
 diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
 index 0c4fa2b..f740644 100644
 --- a/fs/btrfs/Makefile
 +++ b/fs/btrfs/Makefile
 @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o 
 root-tree.o dir-item.o \
  extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
  export.o tree-log.o free-space-cache.o zlib.o lzo.o \
  compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
 -reada.o backref.o ulist.o
 +reada.o backref.o ulist.o send.o
  
  btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
  btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
 diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
 index c9e3fac..282bc64 100644
 --- a/fs/btrfs/ioctl.h
 +++ b/fs/btrfs/ioctl.h
 @@ -304,6 +304,15 @@ struct btrfs_ioctl_received_subvol_args {
   __u64   reserved[16];
  };
  
 +struct btrfs_ioctl_send_args {
 + __s64 send_fd;  /* in */
 + __u64 clone_sources_count;  /* in */
 + __u64 __user *clone_sources;/* in */
 + __u64 parent_root;  /* in */
 + __u64 flags;/* in */
 + __u64 reserved[4];  /* in */
 +};
 +
  #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
  struct btrfs_ioctl_vol_args)
  #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
 @@ -371,6 +380,7 @@ struct btrfs_ioctl_received_subvol_args {
  
  #define BTRFS_IOC_SET_RECEIVED_SUBVOL _IOWR(BTRFS_IOCTL_MAGIC, 37, \
   struct btrfs_ioctl_received_subvol_args)
 +#define BTRFS_IOC_SEND _IOW(BTRFS_IOCTL_MAGIC, 38, struct 
 btrfs_ioctl_send_args)
  
  #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
 struct btrfs_ioctl_get_dev_stats)
 diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
 new file mode 100644
 index 000..47a2557
 --- /dev/null
 +++ b/fs/btrfs/send.c
 @@ -0,0 +1,1009 @@
 +/*
 + * Copyright (C) 2012 Alexander Block.  All rights reserved.
 + *
 + * This program is free software; you can redistribute it and/or
 + * modify it under the terms of the GNU General Public
 + * License v2 as published by the Free Software Foundation.
 + *
 + * This program is distributed in the hope that it will be useful,
 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 + * General Public License for more details.
 + *
 + * You should have received a copy of the GNU General Public
 + * License along with this program; if not, write to the
 + * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 + * Boston, MA 021110-1307, USA.
 + */
 +
 +#include linux/bsearch.h
 +#include linux/fs.h
 +#include linux/file.h
 +#include linux/sort.h
 +#include linux/mount.h
 +#include linux/xattr.h
 +#include linux/posix_acl_xattr.h
 +#include linux/radix-tree.h
 +#include linux/crc32c.h
 +
 +#include send.h
 +#include backref.h
 +#include locking.h
 +#include disk-io.h
 +#include btrfs_inode.h
 +#include transaction.h
 +
 +static int g_verbose = 0;
 +
 +#define verbose_printk(...) if (g_verbose) printk(__VA_ARGS__)

Maybe pr_debug is interesting to you.

 +
 +/*
 + * A fs_path is a helper to dynamically build path names with unknown size.
 + * It reallocates the internal buffer on demand.
 + * It allows fast adding of path elements on the right side (normal path) and
 + * fast adding to the left side (reversed path). A reversed path can also be
 + * unreversed if needed.
 + */
 +struct fs_path {
 + union {
 + struct {
 + char *start;
 + char *end;
 + char *prepared;
 +
 + char *buf;
 + int buf_len;
 + int reversed:1;
 + int virtual_mem:1;

s/int/unsigned int/

 + char inline_buf[];
 + };
 + char pad[PAGE_SIZE];
 + };
 +};
 +#define FS_PATH_INLINE_SIZE \
 + (sizeof(struct fs_path) - offsetof(struct fs_path, 

Re: [PATCH v1 00/15] Btrfs: subvolume quota groups (qgroups)

2012-07-17 Thread Arne Jansen
On 07/17/2012 08:33 PM, Alex Lyakas wrote:
 Jan,
 I have studied to some extent the PDF and the code. I have some
 questions; perhaps you can address them?
 
 1) btrfs_qgroup_account_ref() calling btrfs_find_all_roots():
 I understand that bytenr indicates some EXTENT_ITEM, which is a
 back-reference for extent, which is perhaps a tree block (leaf or

Not a backreference. The EXTENT_ITEM entry is the allocation of the
extent. It also contains the back references.

 node) or EXTENT_DATA. I also understand, that we want to receive a
 list of subvolume roots, that reference this extent at some point in
 time in the middle of a transaction. However, there is mentioning of
 finding all extents that reference this extent, which is something
 basic I don't understand. How an extent can back-reference another

Here, the forward reference is meant. Tree nodes and leaves are
referenced by tree nodes, data extents are referenced by leaves.

 extent? Also, how do we encounter roots (which is what we need in the
 output) during this walking? Hope you can shed some light, or you can

iirc the root backreferences itself, which is the criterion that we
found a root.

 let me continue digging in the code:)
 
 2) btrfs_qgroup_account_ref() step 3:
 I understand that at this step, we look at all roots that we cannot
 reach from the new root (the one to/from which the ref is
 added/removed). And we check the refcnt before/after addition/deletion
 (respectively). Then we check that its refcnt before/after
 addition/deletion equals to the number of reachable roots before/after
 addition/deletion. I still don't understand fully why if these two
 values are equal, we can update exclusive count?

I would have to re-read the pdf, it's been a long time ;)

 My partial understanding is that such root, let's say before addition,
 was exclusive owner of the extent. And now (since this root is not
 reachable from new root), we are adding the extent to some disjoint
 qgroup, so the previous root is not exclusive owner anymore. Is this
 correct direction?

I think so. Drawing trees helps a lot here. Don't give up too easily,
it took us 2 weeks to work out the algorithm ;)

 
 3) The paper mentions tracking groups to account for
 referenced/exclusive properly during snapshot creation. Looking at
 btrfs-progs, I see that currently the user is expected to correctly
 indicate which values should be copied from where, and kernel (more or
 less) blindly copies those values. Is this correct?

Yes. It might be useful to create a description language what you are
going to snapshot from where and let progs take care that all tracking
groups are set up properly. But that is an area for further research,
currently it has to be done by hand.

 
 4) GROUP_RELATION items:
 We have two such items for every relation. How do we know which one is
 the child and which is the parent? It looks from the kernel code that
 it is expected that child-qgroupid  parent-qgroupid. Is this correct?
 If yes, who is enforcing this?

The qgroupid contains the level, and the parent always has to have a
level greater than that of the child. I think that is checked somewhere.
As the level is encoded into the upper bits, the above relation holds.

-Arne

 
 Thanks for your help,
 Alex.
 
 On Thu, Jul 12, 2012 at 12:43 PM, Jan Schmidt list.bt...@jan-o-sch.net 
 wrote:
 This is a new version of Arne's qgroup patches from last October. The
 old patches didn't get the backref walking right, which is now based on
 the tree modification log.

 You can limit the space available to subvolumes or any group of
 subvolumes. You can determine the amount of space that will get freed
 when deleting a snapshot.

 The initial scan is still missing, so expect negative counters when you
 enable quotas on a non-empty volume and then delete stuff.

 Arne's introduction and concept description can still be found at

 http://sensille.com/qgroups.pdf

 You can pull these patches from my git repository

 git://git.jan-o-sch.net/btrfs-unstable qgroup

 The user mode patches required were sent at October 11, 2011 by Arne,
 subject [PATCH v0] btrfs-progs: add qgroup commands.

 I tried to include some fair benchmark results with this cover letter.
 However, I tried several disk benchmarks from the phoronix test suite,
 none of them resulted in any write throughput decrease. I will have to
 create a more realistic setup on my own to benchmark the impact of
 qgroups (suggestions welcome). For now, I just wanted to get that patch
 set out :-)

 Thanks,
 -Jan

 Arne Jansen (11):
   Btrfs: qgroup on-disk format
   Btrfs: add helper for tree enumeration
   Btrfs: check the root passed to btrfs_end_transaction
   Btrfs: added helper to create new trees
   Btrfs: qgroup state and initialization
   Btrfs: Test code to change the order of delayed-ref processing
   Btrfs: qgroup implementation and prototypes
   Btrfs: quota tree support and startup
   Btrfs: hooks to reserve qgroup space
   Btrfs

Re: [PATCH] Btrfs: allow delayed refs to be merged V2

2012-07-16 Thread Arne Jansen
On 14.07.2012 15:09, Josef Bacik wrote:
 Daniel Blueman reported a bug with fio+balance on a ramdisk setup.
 Basically what happens is the balance relocates a tree block which will drop
 the implicit refs for all of its children and adds a full backref.  Once the
 block is relocated we have to add the implicit refs back, so when we cow the
 block again we add the implicit refs for its children back.  The problem
 comes when the original drop ref doesn't get run before we add the implicit
 refs back.  The delayed ref stuff will specifically prefer ADD operations
 over DROP to keep us from freeing up an extent that will have references to
 it, so we try to add the implicit ref before it is actually removed and we
 panic.  This worked fine before because the add would have just canceled the
 drop out and we would have been fine.  But the backref walking work needs to
 be able to freeze the delayed ref stuff in time so we have this ever
 increasing sequence number that gets attached to all new delayed ref updates
 which makes us not merge refs and we run into this issue.
 
 So to fix this we need to merge delayed refs.  So everytime we run a
 clustered ref we need to try and merge all of its delayed refs.  The backref
 walking stuff locks the delayed ref head before processing, so if we have it
 locked we are safe to merge any refs inside of the sequence number.  If
 there is no sequence number we can merge all refs.  Doing this not only
 fixes our bug but keeps the delayed ref code from adding and removing
 useless refs and batching together multiple refs into one search instead of
 one search per delayed ref, which will really help our commit times.  I ran
 this with Daniels test and 276 and I haven't seen any problems.  Thanks,
 
 Reported-by: Daniel J Blueman dan...@quora.org
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
 V1-V2: 
 -Merge all extents, don't do this weird sequence check at the front, just do 
 it
 all when we run the delayed refs.
 -Merge like actions so we can get the performance boost of multiple ref mods 
 at
 the same time

This one looks better. It gives back the merging capability without too much
fiddling with sequences. Although I still think something else is fishy when
merges are needed for correct functionality, this patch seems to fix it while
having additional benefits. Thanks for working on it :)

Acked-by: Arne Jansen sensi...@gmx.net

  fs/btrfs/delayed-ref.c |  152 
 +++-
  fs/btrfs/delayed-ref.h |3 +
  fs/btrfs/extent-tree.c |9 +++
  3 files changed, 137 insertions(+), 27 deletions(-)
 
 diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
 index 13ae7b0..324ccec 100644
 --- a/fs/btrfs/delayed-ref.c
 +++ b/fs/btrfs/delayed-ref.c
 @@ -38,17 +38,14 @@
  static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
 struct btrfs_delayed_tree_ref *ref1)
  {
 - if (ref1-node.type == BTRFS_TREE_BLOCK_REF_KEY) {
 - if (ref1-root  ref2-root)
 - return -1;
 - if (ref1-root  ref2-root)
 - return 1;
 - } else {
 - if (ref1-parent  ref2-parent)
 - return -1;
 - if (ref1-parent  ref2-parent)
 - return 1;
 - }
 + if (ref1-root  ref2-root)
 + return -1;
 + if (ref1-root  ref2-root)
 + return 1;
 + if (ref1-parent  ref2-parent)
 + return -1;
 + if (ref1-parent  ref2-parent)
 + return 1;
   return 0;
  }
  
 @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref 
 *ref2,
   * type of the delayed backrefs and content of delayed backrefs.
   */
  static int comp_entry(struct btrfs_delayed_ref_node *ref2,
 -   struct btrfs_delayed_ref_node *ref1)
 +   struct btrfs_delayed_ref_node *ref1,
 +   bool compare_seq)
  {
   if (ref1-bytenr  ref2-bytenr)
   return -1;
 @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node 
 *ref2,
   if (ref1-type  ref2-type)
   return 1;
   /* merging of sequenced refs is not allowed */
 - if (ref1-seq  ref2-seq)
 - return -1;
 - if (ref1-seq  ref2-seq)
 - return 1;
 + if (compare_seq) {
 + if (ref1-seq  ref2-seq)
 + return -1;
 + if (ref1-seq  ref2-seq)
 + return 1;
 + }
   if (ref1-type == BTRFS_TREE_BLOCK_REF_KEY ||
   ref1-type == BTRFS_SHARED_BLOCK_REF_KEY) {
   return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
 @@ -139,7 +139,7 @@ static struct btrfs_delayed_ref_node *tree_insert(struct 
 rb_root *root,
   entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
rb_node);
  
 - cmp = comp_entry(entry, ins);
 + cmp = comp_entry(entry, ins

Re: btrfs GPF in read_extent_buffer() while scrubbing with kernel 3.4.2

2012-07-16 Thread Arne Jansen
Any news on this? I you give me some hints, I can try to reproduce
it here.

-Arne

On 10.07.2012 08:57, Arne Jansen wrote:
 On 10.07.2012 06:16, Sami Liedes wrote:
 On Mon, Jul 09, 2012 at 11:05:47AM +0200, Arne Jansen wrote:
 * Just before the crash:
   btrfs: invalid parameters for read_extent_buffer: start (32771)  
 eb-len (32768). eb start is 2261163409408, level 100, generation 
 4412718571037421157, nritems 538968254. len param 17. debug 
 2/989/538968254/4412718571037421157/0x0/0/0x0/0x0


 At a first glance: the generation converted to ascii is: ent() ==,
 so someone is patching the memory with ascii text, possibly C source.
 It might be interesting to dump the full contents of the eb, to get
 a clue on the source of the data.

 I changed the code to dump the contents of the eb struct at the point
 where that error (btrfs: invalid parameters...) is printed, at the
 checksum mismatch 4 site and at the node seems invalid now site.
 Now I have a big log of 1795 corrupted ebs. So far nothing that looks
 remotely like ascii text, though. But I have two different versions of
 the eb that caused that warning, a less corrupted one and a more
 corrupted one:

 
 btrfs: --- start eb contents at 8801b13cc4c8 ---
 btrfs: 8801b13cc4c8: 00 80 e4 66 09 02 00 00 00 80 00 00 00 00 00 00  
 ...f
 btrfs: 8801b13cc4d8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc4e8: 20 00 00 00 00 00 00 00 30 00 d7 10 02 88 ff ff   
 ...0...
 btrfs: 8801b13cc4f8: 02 02 00 00 03 00 00 00 06 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc508: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc518: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc528: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc538: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc548: 00 00 10 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc558: 58 c5 3c b1 01 88 ff ff 58 c5 3c b1 01 88 ff ff  
 X..X..
 btrfs: 8801b13cc568: 00 00 00 00 00 00 00 00 70 c5 3c b1 01 88 ff ff  
 p..
 btrfs: 8801b13cc578: 70 c5 3c b1 01 88 ff ff 00 00 00 00 00 00 00 00  
 p..
 btrfs: 8801b13cc588: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc598: 80 5f 9a 06 00 ea ff ff 00 86 9b 06 00 ea ff ff  
 ._..
 btrfs: 8801b13cc5a8: 40 4c 9a 06 00 ea ff ff 80 66 9a 06 00 ea ff ff  
 @L...f..
 btrfs: 8801b13cc5b8: 80 eb 9b 06 00 ea ff ff 40 05 a2 06 00 ea ff ff  
 @...
 btrfs: 8801b13cc5c8: 40 e1 9b 06 00 ea ff ff 80 c4 9c 06 00 ea ff ff  
 @...
 btrfs: 8801b13cc5d8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc5e8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc5f8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc608: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc618: 98 c5 3c b1 01 88 ff ff 00 00 00 00 00 00 00 00  
 ...
 btrfs: 8801b13cc628: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc638: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc648: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc658: 00 00 00 00 00 00 00 00  
 
 
 this one looks good so far.
 
 btrfs: --- end eb contents at 8801b13cc4c8 ---
 btrfs: dm-6 checksum verify failed on 2239404212224 wanted B5F632BC found 
 3579FB59 level 160
 btrfs: node seems invalid now. checksum ok = 1
 btrfs: --- start eb contents at 8801b13cc4c8 ---
 [... identical dump to above ...]
 btrfs: --- end eb contents at 8801b13cc4c8 ---
 btrfs: invalid parameters for read_extent_buffer: start (32771)  eb-len 
 (32768). eb start is 2239404212224, level 160, generation 
 4716553384049587249, nritems 295705211. len param 17. debug 
 2/989/295705211/4716553384049587249/0x0/0/0x0/0x0
 btrfs: --- start eb contents at 8801b13cc4c8 ---
 btrfs: 8801b13cc4c8: 00 80 e4 66 09 02 00 00 00 80 00 00 00 00 00 00  
 ...f
 btrfs: 8801b13cc4d8: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc4e8: 20 00 00 00 00 00 00 00 30 00 d7 10 02 88 ff ff   
 ...0...
 btrfs: 8801b13cc4f8: 02 02 00 00 03 00 00 00 06 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc508: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc518: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  
 
 btrfs: 8801b13cc528: 00 00 00 00

Re: [PATCH] Btrfs: allow delayed refs to be merged V2

2012-07-16 Thread Arne Jansen
One point regarding the merge: wouldn't it be better to put the seq as a sort
criterion at the end, so the merge can happen in one run through the list
instead of this potentially quadratic time? I've seen some warnings from CPU
stuck 22s which recovered after the test.


On 16.07.2012 09:41, Arne Jansen wrote:
 On 14.07.2012 15:09, Josef Bacik wrote:
 Daniel Blueman reported a bug with fio+balance on a ramdisk setup.
 Basically what happens is the balance relocates a tree block which will drop
 the implicit refs for all of its children and adds a full backref.  Once the
 block is relocated we have to add the implicit refs back, so when we cow the
 block again we add the implicit refs for its children back.  The problem
 comes when the original drop ref doesn't get run before we add the implicit
 refs back.  The delayed ref stuff will specifically prefer ADD operations
 over DROP to keep us from freeing up an extent that will have references to
 it, so we try to add the implicit ref before it is actually removed and we
 panic.  This worked fine before because the add would have just canceled the
 drop out and we would have been fine.  But the backref walking work needs to
 be able to freeze the delayed ref stuff in time so we have this ever
 increasing sequence number that gets attached to all new delayed ref updates
 which makes us not merge refs and we run into this issue.

 So to fix this we need to merge delayed refs.  So everytime we run a
 clustered ref we need to try and merge all of its delayed refs.  The backref
 walking stuff locks the delayed ref head before processing, so if we have it
 locked we are safe to merge any refs inside of the sequence number.  If
 there is no sequence number we can merge all refs.  Doing this not only
 fixes our bug but keeps the delayed ref code from adding and removing
 useless refs and batching together multiple refs into one search instead of
 one search per delayed ref, which will really help our commit times.  I ran
 this with Daniels test and 276 and I haven't seen any problems.  Thanks,

 Reported-by: Daniel J Blueman dan...@quora.org
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
 V1-V2: 
 -Merge all extents, don't do this weird sequence check at the front, just do 
 it
 all when we run the delayed refs.
 -Merge like actions so we can get the performance boost of multiple ref mods 
 at
 the same time
 
 This one looks better. It gives back the merging capability without too much
 fiddling with sequences. Although I still think something else is fishy when
 merges are needed for correct functionality, this patch seems to fix it while
 having additional benefits. Thanks for working on it :)
 
 Acked-by: Arne Jansen sensi...@gmx.net
 
  fs/btrfs/delayed-ref.c |  152 
 +++-
  fs/btrfs/delayed-ref.h |3 +
  fs/btrfs/extent-tree.c |9 +++
  3 files changed, 137 insertions(+), 27 deletions(-)

 diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
 index 13ae7b0..324ccec 100644
 --- a/fs/btrfs/delayed-ref.c
 +++ b/fs/btrfs/delayed-ref.c
 @@ -38,17 +38,14 @@
  static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
struct btrfs_delayed_tree_ref *ref1)
  {
 -if (ref1-node.type == BTRFS_TREE_BLOCK_REF_KEY) {
 -if (ref1-root  ref2-root)
 -return -1;
 -if (ref1-root  ref2-root)
 -return 1;
 -} else {
 -if (ref1-parent  ref2-parent)
 -return -1;
 -if (ref1-parent  ref2-parent)
 -return 1;
 -}
 +if (ref1-root  ref2-root)
 +return -1;
 +if (ref1-root  ref2-root)
 +return 1;
 +if (ref1-parent  ref2-parent)
 +return -1;
 +if (ref1-parent  ref2-parent)
 +return 1;
  return 0;
  }
  
 @@ -85,7 +82,8 @@ static int comp_data_refs(struct btrfs_delayed_data_ref 
 *ref2,
   * type of the delayed backrefs and content of delayed backrefs.
   */
  static int comp_entry(struct btrfs_delayed_ref_node *ref2,
 -  struct btrfs_delayed_ref_node *ref1)
 +  struct btrfs_delayed_ref_node *ref1,
 +  bool compare_seq)
  {
  if (ref1-bytenr  ref2-bytenr)
  return -1;
 @@ -102,10 +100,12 @@ static int comp_entry(struct btrfs_delayed_ref_node 
 *ref2,
  if (ref1-type  ref2-type)
  return 1;
  /* merging of sequenced refs is not allowed */
 -if (ref1-seq  ref2-seq)
 -return -1;
 -if (ref1-seq  ref2-seq)
 -return 1;
 +if (compare_seq) {
 +if (ref1-seq  ref2-seq)
 +return -1;
 +if (ref1-seq  ref2-seq)
 +return 1;
 +}
  if (ref1-type == BTRFS_TREE_BLOCK_REF_KEY ||
  ref1-type == BTRFS_SHARED_BLOCK_REF_KEY) {
  return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
 @@ -139,7 +139,7 @@ static

Re: [RFC PATCH 4/7] Btrfs: introduce subvol uuids and times

2012-07-16 Thread Arne Jansen
On 04.07.2012 15:38, Alexander Block wrote:
 This patch introduces uuids for subvolumes. Each
 subvolume has it's own uuid. In case it was snapshotted,
 it also contains parent_uuid. In case it was received,
 it also contains received_uuid.
 
 It also introduces subvolume ctime/otime/stime/rtime. The
 first two are comparable to the times found in inodes. otime
 is the origin/creation time and ctime is the change time.
 stime/rtime are only valid on received subvolumes.
 stime is the time of the subvolume when it was
 sent. rtime is the time of the subvolume when it was
 received.
 
 Additionally to the times, we have a transid for each
 time. They are updated at the same place as the times.
 
 btrfs receive uses stransid and rtransid to find out
 if a received subvolume changed in the meantime.
 
 If an older kernel mounts a filesystem with the
 extented fields, all fields become invalid. The next
 mount with a new kernel will detect this and reset the
 fields.
 
 Signed-off-by: Alexander Block abloc...@googlemail.com
 ---
  fs/btrfs/ctree.h   |   43 ++
  fs/btrfs/disk-io.c |2 +
  fs/btrfs/inode.c   |4 ++
  fs/btrfs/ioctl.c   |   96 
 ++--
  fs/btrfs/ioctl.h   |   13 +++
  fs/btrfs/root-tree.c   |   92 +++---
  fs/btrfs/transaction.c |   17 +
  7 files changed, 258 insertions(+), 9 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index 8cfde93..2bd5df8 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -709,6 +709,35 @@ struct btrfs_root_item {
   struct btrfs_disk_key drop_progress;
   u8 drop_level;
   u8 level;
 +
 + /*
 +  * The following fields appear after subvol_uuids+subvol_times
 +  * were introduced.
 +  */
 +
 + /*
 +  * This generation number is used to test if the new fields are valid
 +  * and up to date while reading the root item. Everytime the root item
 +  * is written out, the generation field is copied into this field. If
 +  * anyone ever mounted the fs with an older kernel, we will have
 +  * mismatching generation values here and thus must invalidate the
 +  * new fields. See btrfs_update_root and btrfs_find_last_root for
 +  * details.
 +  * the offset of generation_v2 is also used as the start for the memset
 +  * when invalidating the fields.
 +  */
 + __le64 generation_v2;
 + u8 uuid[BTRFS_UUID_SIZE];
 + u8 parent_uuid[BTRFS_UUID_SIZE];
 + u8 received_uuid[BTRFS_UUID_SIZE];
 + __le64 ctransid; /* updated when an inode changes */
 + __le64 otransid; /* trans when created */
 + __le64 stransid; /* trans when sent. non-zero for received subvol */
 + __le64 rtransid; /* trans when received. non-zero for received subvol */
 + struct btrfs_timespec ctime;
 + struct btrfs_timespec otime;
 + struct btrfs_timespec stime;
 + struct btrfs_timespec rtime;
  } __attribute__ ((__packed__));
  
  /*
 @@ -1416,6 +1445,8 @@ struct btrfs_root {
   dev_t anon_dev;
  
   int force_cow;
 +
 + spinlock_t root_times_lock;
  };
  
  struct btrfs_ioctl_defrag_range_args {
 @@ -2189,6 +2220,16 @@ BTRFS_SETGET_STACK_FUNCS(root_used, struct 
 btrfs_root_item, bytes_used, 64);
  BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
  BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
last_snapshot, 64);
 +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
 +  generation_v2, 64);
 +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
 +  ctransid, 64);
 +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
 +  otransid, 64);
 +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
 +  stransid, 64);
 +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
 +  rtransid, 64);
  
  static inline bool btrfs_root_readonly(struct btrfs_root *root)
  {
 @@ -2829,6 +2870,8 @@ int btrfs_find_orphan_roots(struct btrfs_root 
 *tree_root);
  void btrfs_set_root_node(struct btrfs_root_item *item,
struct extent_buffer *node);
  void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
 +void btrfs_update_root_times(struct btrfs_trans_handle *trans,
 +  struct btrfs_root *root);
  
  /* dir-item.c */
  int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index 7b845ff..d3b49ad 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -1182,6 +1182,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, 
 u32 sectorsize,
   root-defrag_running = 0;
   root-root_key.objectid = objectid;
   root-anon_dev = 0;
 +
 + spin_lock_init(root-root_times_lock);
  }
  
  static 

  1   2   3   4   >