Here is a better patch that doesn't deadlock (the last one did) and
uses the more proper generic_sync_sb_inodes instead of freeze_bdev.

I used the make-many-files.c program that Marcin dug up somewhere on
the net to test this, making the partition 4 GB, which is a little
too small to hold all the files.  This exercises the nospace handling
nicely.

Note: one thing that freeze_bdev does that generic_sync_sb_inodes does
not is prevent new writes during the flush.  We will have to think
about how this is to be handled.

Daniel
diff -r a49705ea1c95 user/kernel/commit.c
--- a/user/kernel/commit.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/commit.c	Wed Mar 04 03:26:33 2009 -0800
@@ -4,6 +4,7 @@
  */
 
 #include "tux3.h"
+#include <linux/writeback.h>
 
 #ifndef trace
 #define trace trace_on
@@ -32,6 +33,8 @@
 	sb->atomgen = from_be_u32(super->atomgen);
 	sb->freeatom = from_be_u32(super->freeatom);
 	sb->dictsize = from_be_u64(super->dictsize);
+	sb->minchange = 8; /* total blocks changed by smallest change */
+	sb->margin = 100; // should be tunable?
 	trace("blocksize %u, blockbits %u, blockmask %08x",
 	      sb->blocksize, sb->blockbits, sb->blockmask);
 	trace("volblocks %Lu, freeblocks %Lu, nextalloc %Lu",
@@ -236,6 +239,50 @@
 	return 0;
 }
 
+int reserve_credits(struct sb *sb, unsigned credits)
+{
+	sb->credits += credits;
+	if (sb->margin > 0 && sb->freeblocks < sb->credits + sb->margin) {
+#ifdef __KERNEL__
+		struct block_device *bdev = vfs_sb(sb)->s_bdev;
+		warn(">>> %Lx free, %Lx credits", (L)sb->freeblocks, (L)sb->credits);
+		sb->margin = -sb->margin;
+		generic_sync_sb_inodes(vfs_sb(sb), &(struct writeback_control){
+			.sync_mode = WB_SYNC_ALL,
+			.range_end = LLONG_MAX,
+			.nr_to_write = LONG_MAX });
+		sb->credits = 0;
+		thaw_bdev(bdev, vfs_sb(sb));
+		sb->margin = -sb->margin;
+#endif
+		warn(">>> nospace = %i", sb->freeblocks < sb->credits + sb->margin);
+		if (sb->freeblocks < sb->credits + sb->margin) {
+			return -ENOSPC;
+		}
+	}
+	return 0;
+}
+
+void release_credits(struct sb *sb, unsigned credits)
+{
+	// <try to return unused credits to pool> //
+}
+
+int reserve_begin(struct sb *sb)
+{
+	int err;
+	if ((err = reserve_credits(sb, sb->minchange)))
+		return err;
+	return change_begin(sb);
+}
+
+int reduce_begin(struct sb *sb)
+{
+	int err;
+	if ((err = reserve_credits(sb, 0)))
+		return err;
+	return change_begin(sb);
+}
 #ifdef __KERNEL__
 static void *useme[] = { clean_buffer, need_delta, stage_delta, commit_delta, useme };
 #endif
diff -r a49705ea1c95 user/kernel/dir.c
--- a/user/kernel/dir.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/dir.c	Wed Mar 04 03:26:33 2009 -0800
@@ -132,7 +132,7 @@
 		while (entry <= limit) {
 			if (entry->rec_len == 0) {
 				brelse(buffer);
-				tux_error(dir->i_sb, "zero-length directory entry");
+				warn("[CORRUPTION] Zero length directory record detected!");
 				return -EIO;
 			}
 			name_len = TUX_REC_LEN(entry->name_len);
diff -r a49705ea1c95 user/kernel/filemap.c
--- a/user/kernel/filemap.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/filemap.c	Wed Mar 04 03:26:33 2009 -0800
@@ -511,7 +511,11 @@
 			       loff_t pos, unsigned len, unsigned flags,
 			       struct page **pagep, void **fsdata)
 {
+	struct sb *sb = tux_sb(mapping->host->i_sb);
+	int err;
 	*pagep = NULL;
+	if ((err = reserve_credits(sb, (len >> sb->blockbits) + 10)))
+		return err;
 	return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
 				 tux3_da_get_block);
 }
@@ -519,8 +523,10 @@
 static int tux3_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct sb *sb = tux_sb(page->mapping->host->i_sb);
-	change_begin(sb);
-	int err = block_write_full_page(page, tux3_get_block, wbc);
+	int err = change_begin(sb);
+	if (err)
+		return err;
+	err = block_write_full_page(page, tux3_get_block, wbc);
 	change_end(sb);
 	return err;
 }
diff -r a49705ea1c95 user/kernel/inode.c
--- a/user/kernel/inode.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/inode.c	Wed Mar 04 03:26:33 2009 -0800
@@ -233,9 +233,10 @@
 	trace("save inode 0x%Lx", (L)tux_inode(inode)->inum);
 	struct sb *sb = tux_sb(inode->i_sb);
 	struct btree *itable = itable_btree(sb);
+	struct cursor *cursor;
 	int err;
-	struct cursor *cursor = alloc_cursor(itable, 1); /* +1 for new depth */
-	if (!cursor)
+
+	if (!(cursor = alloc_cursor(itable, 1))) /* +1 for new depth */
 		return -ENOMEM;
 	down_write(&cursor->btree->lock);
 	if ((err = probe(cursor, tux_inode(inode)->inum)))
diff -r a49705ea1c95 user/kernel/namei.c
--- a/user/kernel/namei.c	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/namei.c	Wed Mar 04 03:26:33 2009 -0800
@@ -60,13 +60,14 @@
 
 static int tux3_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
 {
+	struct sb *sb = tux_sb(dir->i_sb);
 	struct inode *inode;
 	int err;
 
 	if (!huge_valid_dev(rdev))
 		return -EINVAL;
-
-	change_begin(tux_sb(dir->i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	inode = tux_create_inode(dir, mode, rdev);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -81,7 +82,7 @@
 		iput(inode);
 	}
 out:
-	change_end(tux_sb(dir->i_sb));
+	change_end(sb);
 	return err;
 }
 
@@ -100,13 +101,14 @@
 static int tux3_link(struct dentry *old_dentry, struct inode *dir,
 		     struct dentry *dentry)
 {
+	struct sb *sb = tux_sb(dir->i_sb);
 	struct inode *inode = old_dentry->d_inode;
 	int err;
 
 	if (inode->i_nlink >= TUX_LINK_MAX)
 		return -EMLINK;
-
-	change_begin(tux_sb(inode->i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	inode->i_ctime = gettime();
 	inode_inc_link_count(inode);
 	atomic_inc(&inode->i_count);
@@ -115,17 +117,19 @@
 		inode_dec_link_count(inode);
 		iput(inode);
 	}
-	change_end(tux_sb(inode->i_sb));
+	change_end(sb);
 	return err;
 }
 
 static int tux3_symlink(struct inode *dir, struct dentry *dentry,
 			const char *symname)
 {
+	struct sb *sb = tux_sb(dir->i_sb);
 	struct inode *inode;
 	int err;
 
-	change_begin(tux_sb(dir->i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	inode = tux_create_inode(dir, S_IFLNK | S_IRWXUGO, 0);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
@@ -139,41 +143,42 @@
 		iput(inode);
 	}
 out:
-	change_end(tux_sb(dir->i_sb));
+	change_end(sb);
 	return err;
 }
 
 static int tux3_unlink(struct inode *dir, struct dentry *dentry)
 {
+	struct sb *sb = tux_sb(dir->i_sb);
 	struct inode *inode = dentry->d_inode;
-	change_begin(tux_sb(inode->i_sb));
-	int err = tux_del_dirent(dir, dentry);
-	if (!err) {
+	int err;
+
+	if ((err = reduce_begin(sb)))
+		return err;
+	if (!(tux_del_dirent(dir, dentry))) {
 		inode->i_ctime = dir->i_ctime;
 		inode_dec_link_count(inode);
 	}
-	change_end(tux_sb(inode->i_sb));
+	change_end(sb);
 	return err;
 }
 
 static int tux3_rmdir(struct inode *dir, struct dentry *dentry)
 {
+	struct sb *sb = tux_sb(dir->i_sb);
 	struct inode *inode = dentry->d_inode;
 	int err;
 
-	err = tux_dir_is_empty(inode);
-	if (!err) {
-		change_begin(tux_sb(inode->i_sb));
-		err = tux_del_dirent(dir, dentry);
-		if (!err) {
-			inode->i_ctime = dir->i_ctime;
-			inode->i_size = 0;
-			clear_nlink(inode);
-			mark_inode_dirty(inode);
-			inode_dec_link_count(dir);
-		}
-		change_end(tux_sb(inode->i_sb));
+	if ((err = reduce_begin(sb)))
+		return err;
+	if (!(tux_del_dirent(dir, dentry))) {
+		inode->i_ctime = dir->i_ctime;
+		inode->i_size = 0;
+		clear_nlink(inode);
+		mark_inode_dirty(inode);
+		inode_dec_link_count(dir);
 	}
+	change_end(sb);
 	return err;
 }
 
@@ -184,6 +189,7 @@
 	struct inode *new_inode = new_dentry->d_inode;
 	struct buffer_head *old_buffer, *new_buffer;
 	tux_dirent *old_entry, *new_entry;
+	struct sb *sb = tux_sb(old_dir->i_sb);
 	int err, new_subdir = 0;
 
 	old_entry = tux_find_entry(old_dir, old_dentry->d_name.name,
@@ -194,7 +200,8 @@
 	/* FIXME: is this needed? */
 	BUG_ON(from_be_u64(old_entry->inum) != tux_inode(old_inode)->inum);
 
-	change_begin(tux_sb(old_inode->i_sb));
+	if ((err = reserve_begin(sb)))
+		return err;
 	if (new_inode) {
 		int old_is_dir = S_ISDIR(old_inode->i_mode);
 		if (old_is_dir) {
@@ -244,11 +251,11 @@
 	if (!err && new_subdir)
 		inode_dec_link_count(old_dir);
 
-	change_end(tux_sb(old_inode->i_sb));
+	change_end(sb);
 	return err;
 
 error:
-	change_end(tux_sb(old_inode->i_sb));
+	change_end(sb);
 	brelse(old_buffer);
 	return err;
 }
diff -r a49705ea1c95 user/kernel/tux3.h
--- a/user/kernel/tux3.h	Tue Mar 03 20:43:11 2009 -0800
+++ b/user/kernel/tux3.h	Wed Mar 04 03:26:33 2009 -0800
@@ -337,6 +337,9 @@
 	struct list_head pinned; /* dirty metadata not flushed per delta */
 	struct list_head commit; /* dirty metadata flushed per delta */
 	struct list_head dirty_inodes;	/* dirty inodes list */
+	unsigned credits;	/* Blocks reserved for inflight operations */
+	unsigned minchange;	/* total blocks changed by smallest change */
+	int margin;		/* ENOSPC when freeblocks less than this */
 #ifdef __KERNEL__
 	struct super_block *vfs_sb; /* Generic kernel superblock */
 #else
@@ -900,6 +903,9 @@
 int load_itable(struct sb *sb);
 int change_begin(struct sb *sb);
 int change_end(struct sb *sb);
+int reserve_credits(struct sb *sb, unsigned credits);
+int reserve_begin(struct sb *sb);
+int reduce_begin(struct sb *sb);
 
 /* temporary hack for buffer */
 struct buffer_head *blockread(struct address_space *mapping, block_t iblock);
_______________________________________________
Tux3 mailing list
Tux3@tux3.org
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3

Reply via email to