[PATCH 1/5] btrfs: pass buffer extent to btrfs_free_tree_block

2010-05-11 Thread Yan, Zheng
prepare for the log code

Signed-off-by: Yan Zheng zheng@oracle.com

---
diff -urp 1/fs/btrfs/ctree.c 2/fs/btrfs/ctree.c
--- 1/fs/btrfs/ctree.c  2010-04-14 14:49:56.342950744 +0800
+++ 2/fs/btrfs/ctree.c  2010-05-11 14:00:04.122357838 +0800
@@ -279,7 +279,8 @@ int btrfs_block_can_be_shared(struct btr
 static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
   struct btrfs_root *root,
   struct extent_buffer *buf,
-  struct extent_buffer *cow)
+  struct extent_buffer *cow,
+  int *last_ref)
 {
u64 refs;
u64 owner;
@@ -365,6 +366,7 @@ static noinline int update_ref_for_cow(s
BUG_ON(ret);
}
clean_tree_block(trans, root, buf);
+   *last_ref = 1;
}
return 0;
 }
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(st
struct extent_buffer *cow;
int level;
int unlock_orig = 0;
+   int last_ref = 0;
u64 parent_start;
 
if (*cow_ret == buf)
@@ -441,7 +444,7 @@ static noinline int __btrfs_cow_block(st
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
 
-   update_ref_for_cow(trans, root, buf, cow);
+   update_ref_for_cow(trans, root, buf, cow, last_ref);
 
if (buf == root-node) {
WARN_ON(parent  parent != buf);
@@ -456,8 +459,8 @@ static noinline int __btrfs_cow_block(st
extent_buffer_get(cow);
spin_unlock(root-node_lock);
 
-   btrfs_free_tree_block(trans, root, buf-start, buf-len,
-   parent_start, root-root_key.objectid, level);
+   btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -472,8 +475,8 @@ static noinline int __btrfs_cow_block(st
btrfs_set_node_ptr_generation(parent, parent_slot,
  trans-transid);
btrfs_mark_buffer_dirty(parent);
-   btrfs_free_tree_block(trans, root, buf-start, buf-len,
-   parent_start, root-root_key.objectid, level);
+   btrfs_free_tree_block(trans, root, buf, parent_start,
+ last_ref);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -948,6 +951,22 @@ int btrfs_bin_search(struct extent_buffe
return bin_search(eb, key, level, slot);
 }
 
+static void root_add_used(struct btrfs_root *root, u32 size)
+{
+   spin_lock(root-node_lock);
+   btrfs_set_root_used(root-root_item,
+   btrfs_root_used(root-root_item) + size);
+   spin_unlock(root-node_lock);
+}
+
+static void root_sub_used(struct btrfs_root *root, u32 size)
+{
+   spin_lock(root-node_lock);
+   btrfs_set_root_used(root-root_item,
+   btrfs_root_used(root-root_item) - size);
+   spin_unlock(root-node_lock);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -1018,7 +1037,11 @@ static noinline int balance_level(struct
btrfs_tree_lock(child);
btrfs_set_lock_blocking(child);
ret = btrfs_cow_block(trans, root, child, mid, 0, child);
-   BUG_ON(ret);
+   if (ret) {
+   btrfs_tree_unlock(child);
+   free_extent_buffer(child);
+   goto enospc;
+   }
 
spin_lock(root-node_lock);
root-node = child;
@@ -1033,11 +1056,12 @@ static noinline int balance_level(struct
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
-   ret = btrfs_free_tree_block(trans, root, mid-start, mid-len,
-   0, root-root_key.objectid, level);
+
+   root_sub_used(root, mid-len);
+   btrfs_free_tree_block(trans, root, mid, 0, 1);
/* once for the root ptr */
free_extent_buffer(mid);
-   return ret;
+   return 0;
}
if (btrfs_header_nritems(mid) 
BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
@@ -1087,23 +,16 @@ static noinline int balance_level(struct
if (wret  0  wret != -ENOSPC)
ret = wret;
if (btrfs_header_nritems(right) == 0) {
-   u64 bytenr = right-start;
-   u32 blocksize = right-len;
-

[PATCH 3/5] btrfs: split btrfs_alloc_free_block()

2010-05-11 Thread Yan, Zheng
split btrfs_alloc_free_block() into btrfs_reserved_tree_block()
and btrfs_alloc_reserved_tree_block().

Signed-off-by: Yan Zheng zheng@oracle.com

---
diff -urp 3/fs/btrfs/ctree.h 4/fs/btrfs/ctree.h
--- 3/fs/btrfs/ctree.h  2010-05-11 14:09:45.052107958 +0800
+++ 4/fs/btrfs/ctree.h  2010-05-11 13:15:47.060357000 +0800
@@ -1978,6 +1978,15 @@ struct btrfs_block_group_cache *btrfs_lo
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 u64 btrfs_find_block_group(struct btrfs_root *root,
   u64 search_start, u64 search_hint, int owner);
+struct extent_buffer *btrfs_reserve_tree_block(struct btrfs_trans_handle 
*trans,
+  struct btrfs_root *root,
+  u32 blocksize, int level,
+  u64 hint, u64 empty_size);
+int btrfs_alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
+   struct btrfs_root *root,
+   struct extent_buffer *buf,
+   u64 parent, u64 root_objectid,
+   struct btrfs_disk_key *key, int level);
 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u32 blocksize,
u64 parent, u64 root_objectid,
diff -urp 3/fs/btrfs/extent-tree.c 4/fs/btrfs/extent-tree.c
--- 3/fs/btrfs/extent-tree.c2010-05-11 14:12:00.044357180 +0800
+++ 4/fs/btrfs/extent-tree.c2010-05-11 13:26:38.036107000 +0800
@@ -4956,64 +4998,6 @@ int btrfs_alloc_logged_file_extent(struc
return ret;
 }
 
-/*
- * finds a free extent and does all the dirty work required for allocation
- * returns the key for the extent through ins, and a tree buffer for
- * the first block of the extent through buf.
- *
- * returns 0 if everything worked, non-zero otherwise.
- */
-static int alloc_tree_block(struct btrfs_trans_handle *trans,
-   struct btrfs_root *root,
-   u64 num_bytes, u64 parent, u64 root_objectid,
-   struct btrfs_disk_key *key, int level,
-   u64 empty_size, u64 hint_byte, u64 search_end,
-   struct btrfs_key *ins)
-{
-   int ret;
-   u64 flags = 0;
-
-   ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes,
-  empty_size, hint_byte, search_end,
-  ins, 0);
-   if (ret)
-   return ret;
-
-   if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
-   if (parent == 0)
-   parent = ins-objectid;
-   flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
-   } else
-   BUG_ON(parent  0);
-
-   if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
-   struct btrfs_delayed_extent_op *extent_op;
-   extent_op = kzalloc(sizeof(*extent_op), GFP_NOFS);
-   BUG_ON(!extent_op);
-   if (key)
-   memcpy(extent_op-key, key, sizeof(extent_op-key));
-   extent_op-flags_to_set = flags;
-   extent_op-update_key = 1;
-   extent_op-update_gen = 1;
-   extent_op-update_flags = 1;
-
-   ret = btrfs_add_delayed_tree_ref(trans, ins-objectid,
-   ins-offset, parent, root_objectid,
-   level, BTRFS_ADD_DELAYED_EXTENT,
-   extent_op);
-   BUG_ON(ret);
-   }
-
-   if (root_objectid == root-root_key.objectid) {
-   u64 used;
-   spin_lock(root-node_lock);
-   used = btrfs_root_used(root-root_item) + num_bytes;
-   btrfs_set_root_used(root-root_item, used);
-   spin_unlock(root-node_lock);
-   }
-   return ret;
-}
-
 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@@ -5052,8 +5036,68 @@ struct extent_buffer *btrfs_init_new_buf
return buf;
 }
 
+struct extent_buffer *btrfs_reserve_tree_block(struct btrfs_trans_handle 
*trans,
+  struct btrfs_root *root,
+  u32 blocksize, int level,
+  u64 hint, u64 empty_size)
+{
+
+   struct btrfs_key ins;
+   struct extent_buffer *buf;
+   int ret;
+
+   ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
+  empty_size, hint, (u64)-1, ins, 0);
+   if (ret)
+   return ERR_PTR(ret);
+
+   buf = btrfs_init_new_buffer(trans, root, 

[PATCH 4/5] btrfs: don't cache empty block groups during mount

2010-05-11 Thread Yan, Zheng
the tree log recover code expects no free space cached before it executes.

Signed-off-by: Yan Zheng zheng@oracle.com

---
diff -urp 4/fs/btrfs/extent-tree.c 8/fs/btrfs/extent-tree.c
--- 4/fs/btrfs/extent-tree.c2010-05-11 14:15:29.174108554 +0800
+++ 8/fs/btrfs/extent-tree.c2010-05-11 13:26:38.036107000 +0800
@@ -316,11 +329,6 @@ static int caching_kthread(void *data)
if (!path)
return -ENOMEM;
 
-   exclude_super_stripes(extent_root, block_group);
-   spin_lock(block_group-space_info-lock);
-   block_group-space_info-bytes_super += block_group-bytes_super;
-   spin_unlock(block_group-space_info-lock);
-
last = max_t(u64, block_group-key.objectid, BTRFS_SUPER_INFO_OFFSET);
 
/*
@@ -7499,6 +7541,7 @@ int btrfs_free_block_groups(struct btrfs
if (block_group-cached == BTRFS_CACHE_STARTED)
wait_block_group_cache_done(block_group);
 
+   free_excluded_extents(info-extent_root, block_group);
btrfs_remove_free_space_cache(block_group);
btrfs_put_block_group(block_group);
 
@@ -7586,26 +7629,12 @@ int btrfs_read_block_groups(struct btrfs
cache-flags = btrfs_block_group_flags(cache-item);
cache-sectorsize = root-sectorsize;
 
-   /*
-* check for two cases, either we are full, and therefore
-* don't need to bother with the caching work since we won't
-* find any space, or we are empty, and we can just add all
-* the space in and be done with it.  This saves us _alot_ of
-* time, particularly in the full case.
-*/
-   if (found_key.offset == btrfs_block_group_used(cache-item)) {
-   exclude_super_stripes(root, cache);
-   cache-last_byte_to_unpin = (u64)-1;
-   cache-cached = BTRFS_CACHE_FINISHED;
-   free_excluded_extents(root, cache);
-   } else if (btrfs_block_group_used(cache-item) == 0) {
-   exclude_super_stripes(root, cache);
+   exclude_super_stripes(root, cache);
+   /* check for the case that block group is full */
+   if (found_key.offset == cache-bytes_super +
+   btrfs_block_group_used(cache-item)) {
cache-last_byte_to_unpin = (u64)-1;
cache-cached = BTRFS_CACHE_FINISHED;
-   add_new_free_space(cache, root-fs_info,
-  found_key.objectid,
-  found_key.objectid +
-  found_key.offset);
free_excluded_extents(root, cache);
}
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: rmdir SubVolume?

2010-05-11 Thread Harshavardhana

On 05/11/2010 08:51 AM, Jay Sullivan wrote:

Hi,

I'm just experimenting with btrfs (am currently using btrfs-tools
v0.19). I'm curious about deleting subvolume (and snapshot)
directories.   What exactly is the technical reason why we need a
special command btrfsctl -D to delete these?  What's the obstacle
preventing rmdir from being used on these?

   
rmdir returns ENOTEMPTY as of now, there was a patch which i sent some 
time back to return EPERM to make it more intuitive. I am not sure will 
it be included.


Since there is another idea of actually implementing the btrfsctl -D 
part into btrfs_rmdir

itself. Never got much time to look into it.

Regards

--
Harshavardhana
Gluster Inc - http://www.gluster.com



--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] direct-io: honor dio-boundary a little more strictly

2010-05-11 Thread Josef Bacik
On Fri, May 07, 2010 at 01:41:04PM -0400, Josef Bacik wrote:
 Because BTRFS needs to be able to lookup checksums when we submit the bio's, 
 we
 need to be able to look up the logical offset in the inode we're submitting 
 the
 bio for.  The way we do this is in our get_blocks function is return the 
 map_bh
 with b_blocknr of the logical offset in the file, and then in the submit path
 turn that into an actual block number on the device.  This causes problems 
 with
 the DIO stuff since it will try and merge requests that look like they are
 contiguous, even though they are not actually contiguous on disk.  So BTRFS 
 sets
 buffer_boundary on the map_bh.  Unfortunately if there is not a bio already
 setup in the DIO stuff, dio-boundary gets cleared and then the next time a
 request is made they will get merged.  So instead of clearing dio-boundary in
 dio_new_bio, save the boundary value before doing anything, that way if
 dio-boundary gets cleared, we still submit the IO.  Thanks,
 
 Signed-off-by: Josef Bacik jo...@redhat.com
 ---
  fs/direct-io.c |5 -
  1 files changed, 4 insertions(+), 1 deletions(-)
 
 diff --git a/fs/direct-io.c b/fs/direct-io.c
 index 2dbf2e9..98f6f42 100644
 --- a/fs/direct-io.c
 +++ b/fs/direct-io.c
 @@ -615,6 +615,7 @@ static int dio_bio_add_page(struct dio *dio)
   */
  static int dio_send_cur_page(struct dio *dio)
  {
 + int boundary = dio-boundary;
   int ret = 0;
  
   if (dio-bio) {
 @@ -627,7 +628,7 @@ static int dio_send_cur_page(struct dio *dio)
* Submit now if the underlying fs is about to perform a
* metadata read
*/
 - if (dio-boundary)
 + if (boundary)
   dio_bio_submit(dio);
   }
  
 @@ -644,6 +645,8 @@ static int dio_send_cur_page(struct dio *dio)
   ret = dio_bio_add_page(dio);
   BUG_ON(ret != 0);
   }
 + } else if (boundary) {
 + dio_bio_submit(dio);
   }
  out:
   return ret;

Self-NACK on this one.  Seems to have an unwanted side-effect of forcing every
page to be submitted individually.  I'm going to fix this a different way.
Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Disk space accounting and subvolume delete

2010-05-11 Thread Yan, Zheng
On Tue, May 11, 2010 at 11:45 PM, Bruce Guenter br...@untroubled.org wrote:
 On Tue, May 11, 2010 at 08:10:38AM +0800, Yan, Zheng  wrote:
 This is because the snapshot deleting ioctl only removes the a link.

 Right, I understand that.  That part is not unexpected, as it works just
 like unlink would.  However...

 The corresponding tree is dropped in the background by a kernel thread.

 The surprise is that 'sync', in any form I was able to try, does not
 wait until all or even most of the I/O is completed.  Apparently the
 standards spec for sync(2) says it is not required to wait for I/O to
 complete, but AFAIK all other Linux FS do wait (the man page for sync(2)
 implies as much, as does the info page for sync in glibc).

 The only way I've found so far to force this behavior is to unmount, and
 that's rather intrusive to other users of the FS.

 We could probably add another ioctl that waits until the tree has been
 completely dropped.

 Since the expected behavior for sync is to wait until all pending I/O
 has been completed, I would argue this should be the default action for
 sync.  Am I misunderstanding something?


Dropping a tree can be lengthy. It's not good to let sync wait for hours.
For most linux FS, 'sync' just force an transaction/journal commit. I don't
think they wait for large operations that can span multiple transactions to
complete.

Yan, Zheng
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html