Re: [Tux3] Patch: Preliminary attempt at nospace handing

2009-03-04 Thread Daniel Phillips
Here is a better patch that doesn't deadlock (the last one did) and
uses the more proper generic_sync_sb_inodes instead of freeze_bdev.

I used the make-many-files.c program that Marcin dug up somewhere on
the net to test this, making the partition 4 GB, which is a little
too small to hold all the files.  This exercises the nospace handling
nicely.

Note: one thing that freeze_bdev does that generic_sync_sb_inodes does
not is prevent new writes during the flush.  We will have to think
about how this is to be handled.

Daniel
diff -r a49705ea1c95 user/kernel/commit.c
--- a/user/kernel/commit.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/commit.c	Wed Mar 04 03:26:33 2009 -0800
@@ -4,6 +4,7 @@
  */
 
 #include tux3.h
+#include linux/writeback.h
 
 #ifndef trace
 #define trace trace_on
@@ -32,6 +33,8 @@
 	sb-atomgen = from_be_u32(super-atomgen);
 	sb-freeatom = from_be_u32(super-freeatom);
 	sb-dictsize = from_be_u64(super-dictsize);
+	sb-minchange = 8; /* total blocks changed by smallest change */
+	sb-margin = 100; // should be tunable?
 	trace(blocksize %u, blockbits %u, blockmask %08x,
 	  sb-blocksize, sb-blockbits, sb-blockmask);
 	trace(volblocks %Lu, freeblocks %Lu, nextalloc %Lu,
@@ -236,6 +239,50 @@
 	return 0;
 }
 
+int reserve_credits(struct sb *sb, unsigned credits)
+{
+	sb-credits += credits;
+	if (sb-margin  0  sb-freeblocks  sb-credits + sb-margin) {
+#ifdef __KERNEL__
+		struct block_device *bdev = vfs_sb(sb)-s_bdev;
+		warn( %Lx free, %Lx credits, (L)sb-freeblocks, (L)sb-credits);
+		sb-margin = -sb-margin;
+		generic_sync_sb_inodes(vfs_sb(sb), (struct writeback_control){
+			.sync_mode = WB_SYNC_ALL,
+			.range_end = LLONG_MAX,
+			.nr_to_write = LONG_MAX });
+		sb-credits = 0;
+		thaw_bdev(bdev, vfs_sb(sb));
+		sb-margin = -sb-margin;
+#endif
+		warn( nospace = %i, sb-freeblocks  sb-credits + sb-margin);
+		if (sb-freeblocks  sb-credits + sb-margin) {
+			return -ENOSPC;
+		}
+	}
+	return 0;
+}
+
+void release_credits(struct sb *sb, unsigned credits)
+{
+	// try to return unused credits to pool //
+}
+
+int reserve_begin(struct sb *sb)
+{
+	int err;
+	if ((err = reserve_credits(sb, sb-minchange)))
+		return err;
+	return change_begin(sb);
+}
+
+int reduce_begin(struct sb *sb)
+{
+	int err;
+	if ((err = reserve_credits(sb, 0)))
+		return err;
+	return change_begin(sb);
+}
 #ifdef __KERNEL__
 static void *useme[] = { clean_buffer, need_delta, stage_delta, commit_delta, useme };
 #endif
diff -r a49705ea1c95 user/kernel/dir.c
--- a/user/kernel/dir.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/dir.c	Wed Mar 04 03:26:33 2009 -0800
@@ -132,7 +132,7 @@
 		while (entry = limit) {
 			if (entry-rec_len == 0) {
 brelse(buffer);
-tux_error(dir-i_sb, zero-length directory entry);
+warn([CORRUPTION] Zero length directory record detected!);
 return -EIO;
 			}
 			name_len = TUX_REC_LEN(entry-name_len);
diff -r a49705ea1c95 user/kernel/filemap.c
--- a/user/kernel/filemap.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/filemap.c	Wed Mar 04 03:26:33 2009 -0800
@@ -511,7 +511,11 @@
 			   loff_t pos, unsigned len, unsigned flags,
 			   struct page **pagep, void **fsdata)
 {
+	struct sb *sb = tux_sb(mapping-host-i_sb);
+	int err;
 	*pagep = NULL;
+	if ((err = reserve_credits(sb, (len  sb-blockbits) + 10)))
+		return err;
 	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
  tux3_da_get_block);
 }
@@ -519,8 +523,10 @@
 static int tux3_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct sb *sb = tux_sb(page-mapping-host-i_sb);
-	change_begin(sb);
-	int err = block_write_full_page(page, tux3_get_block, wbc);
+	int err = change_begin(sb);
+	if (err)
+		return err;
+	err = block_write_full_page(page, tux3_get_block, wbc);
 	change_end(sb);
 	return err;
 }
diff -r a49705ea1c95 user/kernel/inode.c
--- a/user/kernel/inode.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/inode.c	Wed Mar 04 03:26:33 2009 -0800
@@ -233,9 +233,10 @@
 	trace(save inode 0x%Lx, (L)tux_inode(inode)-inum);
 	struct sb *sb = tux_sb(inode-i_sb);
 	struct btree *itable = itable_btree(sb);
+	struct cursor *cursor;
 	int err;
-	struct cursor *cursor = alloc_cursor(itable, 1); /* +1 for new depth */
-	if (!cursor)
+
+	if (!(cursor = alloc_cursor(itable, 1))) /* +1 for new depth */
 		return -ENOMEM;
 	down_write(cursor-btree-lock);
 	if ((err = probe(cursor, tux_inode(inode)-inum)))
diff -r a49705ea1c95 user/kernel/namei.c
--- a/user/kernel/namei.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/namei.c	Wed Mar 04 03:26:33 2009 -0800
@@ -60,13 +60,14 @@
 
 static int tux3_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
+	struct sb *sb = tux_sb(dir-i_sb);
 	struct inode *inode;
 	int err;
 
 	if (!huge_valid_dev(rdev))
 		return -EINVAL;
-
-	change_begin(tux_sb(dir-i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	inode = tux_create_inode(dir, mode, rdev);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -81,7 +82,7 @@
 		

Re: [Tux3] Patch: Preliminary attempt at nospace handing

2009-03-04 Thread Daniel Phillips
Another small adjustment to make rm -r work without failing on nospace.

Regards,

Daniel
diff -r a49705ea1c95 user/kernel/commit.c
--- a/user/kernel/commit.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/commit.c	Wed Mar 04 18:17:05 2009 -0800
@@ -32,6 +32,8 @@ int load_sb(struct sb *sb)
 	sb-atomgen = from_be_u32(super-atomgen);
 	sb-freeatom = from_be_u32(super-freeatom);
 	sb-dictsize = from_be_u64(super-dictsize);
+	sb-minchange = 8; /* total blocks changed by smallest change */
+	sb-margin = 100; // should be tunable?
 	trace(blocksize %u, blockbits %u, blockmask %08x,
 	  sb-blocksize, sb-blockbits, sb-blockmask);
 	trace(volblocks %Lu, freeblocks %Lu, nextalloc %Lu,
@@ -236,6 +238,50 @@ int change_end(struct sb *sb)
 	return 0;
 }
 
+int reserve_credits(struct sb *sb, unsigned credits)
+{
+	sb-credits += credits;
+	if (sb-margin  0  sb-freeblocks  sb-credits + sb-margin) {
+#ifdef __KERNEL__
+		struct block_device *bdev = vfs_sb(sb)-s_bdev;
+		warn( %Lx free, %Lx credits, (L)sb-freeblocks, (L)sb-credits);
+		sb-margin = -sb-margin;
+		generic_sync_sb_inodes(vfs_sb(sb), (struct writeback_control){
+			.sync_mode = WB_SYNC_ALL,
+			.range_end = LLONG_MAX,
+			.nr_to_write = LONG_MAX });
+		sb-credits = 0;
+		thaw_bdev(bdev, vfs_sb(sb));
+		sb-margin = -sb-margin;
+#endif
+		warn( nospace = %i, sb-freeblocks  sb-credits + sb-margin);
+		if (sb-freeblocks  sb-credits + sb-margin) {
+			return -ENOSPC;
+		}
+	}
+	return 0;
+}
+
+void release_credits(struct sb *sb, unsigned credits)
+{
+	// try to return unused credits to pool //
+}
+
+int reserve_begin(struct sb *sb)
+{
+	int err;
+	if ((err = reserve_credits(sb, sb-minchange)))
+		return err;
+	return change_begin(sb);
+}
+
+int reduce_begin(struct sb *sb)
+{
+//	int err;
+//	if ((err = reserve_credits(sb, 0)))
+//		return err;
+	return change_begin(sb);
+}
 #ifdef __KERNEL__
 static void *useme[] = { clean_buffer, need_delta, stage_delta, commit_delta, useme };
 #endif
diff -r a49705ea1c95 user/kernel/dir.c
--- a/user/kernel/dir.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/dir.c	Wed Mar 04 18:17:05 2009 -0800
@@ -132,7 +132,7 @@ loff_t _tux_create_entry(struct inode *d
 		while (entry = limit) {
 			if (entry-rec_len == 0) {
 brelse(buffer);
-tux_error(dir-i_sb, zero-length directory entry);
+warn([CORRUPTION] Zero length directory record detected!);
 return -EIO;
 			}
 			name_len = TUX_REC_LEN(entry-name_len);
diff -r a49705ea1c95 user/kernel/filemap.c
--- a/user/kernel/filemap.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/filemap.c	Wed Mar 04 18:17:05 2009 -0800
@@ -511,7 +511,11 @@ static int tux3_da_write_begin(struct fi
 			   loff_t pos, unsigned len, unsigned flags,
 			   struct page **pagep, void **fsdata)
 {
+	struct sb *sb = tux_sb(mapping-host-i_sb);
+	int err;
 	*pagep = NULL;
+	if ((err = reserve_credits(sb, (len  sb-blockbits) + 10)))
+		return err;
 	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
  tux3_da_get_block);
 }
@@ -519,8 +523,10 @@ static int tux3_writepage(struct page *p
 static int tux3_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct sb *sb = tux_sb(page-mapping-host-i_sb);
-	change_begin(sb);
-	int err = block_write_full_page(page, tux3_get_block, wbc);
+	int err = change_begin(sb);
+	if (err)
+		return err;
+	err = block_write_full_page(page, tux3_get_block, wbc);
 	change_end(sb);
 	return err;
 }
diff -r a49705ea1c95 user/kernel/inode.c
--- a/user/kernel/inode.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/inode.c	Wed Mar 04 18:17:05 2009 -0800
@@ -233,9 +233,10 @@ static int save_inode(struct inode *inod
 	trace(save inode 0x%Lx, (L)tux_inode(inode)-inum);
 	struct sb *sb = tux_sb(inode-i_sb);
 	struct btree *itable = itable_btree(sb);
+	struct cursor *cursor;
 	int err;
-	struct cursor *cursor = alloc_cursor(itable, 1); /* +1 for new depth */
-	if (!cursor)
+
+	if (!(cursor = alloc_cursor(itable, 1))) /* +1 for new depth */
 		return -ENOMEM;
 	down_write(cursor-btree-lock);
 	if ((err = probe(cursor, tux_inode(inode)-inum)))
diff -r a49705ea1c95 user/kernel/namei.c
--- a/user/kernel/namei.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/namei.c	Wed Mar 04 18:17:05 2009 -0800
@@ -60,13 +60,14 @@ static int tux_del_dirent(struct inode *
 
 static int tux3_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
+	struct sb *sb = tux_sb(dir-i_sb);
 	struct inode *inode;
 	int err;
 
 	if (!huge_valid_dev(rdev))
 		return -EINVAL;
-
-	change_begin(tux_sb(dir-i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	inode = tux_create_inode(dir, mode, rdev);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -81,7 +82,7 @@ static int tux3_mknod(struct inode *dir,
 		iput(inode);
 	}
 out:
-	change_end(tux_sb(dir-i_sb));
+	change_end(sb);
 	return err;
 }
 
@@ -100,13 +101,14 @@ static int tux3_link(struct dentry *old_
 static int tux3_link(struct dentry *old_dentry, struct inode *dir,
 		 struct