Re: [Tux3] Patch: Preliminary attempt at nospace handing
Here is a better patch that doesn't deadlock (the last one did) and uses the more proper generic_sync_sb_inodes instead of freeze_bdev. I used the make-many-files.c program that Marcin dug up somewhere on the net to test this, making the partition 4 GB, which is a little too small to hold all the files. This exercises the nospace handling nicely. Note: one thing that freeze_bdev does that generic_sync_sb_inodes does not is prevent new writes during the flush. We will have to think about how this is to be handled. Daniel diff -r a49705ea1c95 user/kernel/commit.c --- a/user/kernel/commit.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/commit.c Wed Mar 04 03:26:33 2009 -0800 @@ -4,6 +4,7 @@ */ #include tux3.h +#include linux/writeback.h #ifndef trace #define trace trace_on @@ -32,6 +33,8 @@ sb-atomgen = from_be_u32(super-atomgen); sb-freeatom = from_be_u32(super-freeatom); sb-dictsize = from_be_u64(super-dictsize); + sb-minchange = 8; /* total blocks changed by smallest change */ + sb-margin = 100; // should be tunable? trace(blocksize %u, blockbits %u, blockmask %08x, sb-blocksize, sb-blockbits, sb-blockmask); trace(volblocks %Lu, freeblocks %Lu, nextalloc %Lu, @@ -236,6 +239,50 @@ return 0; } +int reserve_credits(struct sb *sb, unsigned credits) +{ + sb-credits += credits; + if (sb-margin 0 sb-freeblocks sb-credits + sb-margin) { +#ifdef __KERNEL__ + struct block_device *bdev = vfs_sb(sb)-s_bdev; + warn( %Lx free, %Lx credits, (L)sb-freeblocks, (L)sb-credits); + sb-margin = -sb-margin; + generic_sync_sb_inodes(vfs_sb(sb), (struct writeback_control){ + .sync_mode = WB_SYNC_ALL, + .range_end = LLONG_MAX, + .nr_to_write = LONG_MAX }); + sb-credits = 0; + thaw_bdev(bdev, vfs_sb(sb)); + sb-margin = -sb-margin; +#endif + warn( nospace = %i, sb-freeblocks sb-credits + sb-margin); + if (sb-freeblocks sb-credits + sb-margin) { + return -ENOSPC; + } + } + return 0; +} + +void release_credits(struct sb *sb, unsigned credits) +{ + // try to return unused credits to pool // +} + +int reserve_begin(struct sb *sb) +{ + int err; + if ((err = reserve_credits(sb, sb-minchange))) + return err; + return change_begin(sb); +} + +int reduce_begin(struct sb *sb) +{ + int err; + if ((err = reserve_credits(sb, 0))) + return err; + return change_begin(sb); +} #ifdef __KERNEL__ static void *useme[] = { clean_buffer, need_delta, stage_delta, commit_delta, useme }; #endif diff -r a49705ea1c95 user/kernel/dir.c --- a/user/kernel/dir.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/dir.c Wed Mar 04 03:26:33 2009 -0800 @@ -132,7 +132,7 @@ while (entry = limit) { if (entry-rec_len == 0) { brelse(buffer); -tux_error(dir-i_sb, zero-length directory entry); +warn([CORRUPTION] Zero length directory record detected!); return -EIO; } name_len = TUX_REC_LEN(entry-name_len); diff -r a49705ea1c95 user/kernel/filemap.c --- a/user/kernel/filemap.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/filemap.c Wed Mar 04 03:26:33 2009 -0800 @@ -511,7 +511,11 @@ loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + struct sb *sb = tux_sb(mapping-host-i_sb); + int err; *pagep = NULL; + if ((err = reserve_credits(sb, (len sb-blockbits) + 10))) + return err; return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, tux3_da_get_block); } @@ -519,8 +523,10 @@ static int tux3_writepage(struct page *page, struct writeback_control *wbc) { struct sb *sb = tux_sb(page-mapping-host-i_sb); - change_begin(sb); - int err = block_write_full_page(page, tux3_get_block, wbc); + int err = change_begin(sb); + if (err) + return err; + err = block_write_full_page(page, tux3_get_block, wbc); change_end(sb); return err; } diff -r a49705ea1c95 user/kernel/inode.c --- a/user/kernel/inode.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/inode.c Wed Mar 04 03:26:33 2009 -0800 @@ -233,9 +233,10 @@ trace(save inode 0x%Lx, (L)tux_inode(inode)-inum); struct sb *sb = tux_sb(inode-i_sb); struct btree *itable = itable_btree(sb); + struct cursor *cursor; int err; - struct cursor *cursor = alloc_cursor(itable, 1); /* +1 for new depth */ - if (!cursor) + + if (!(cursor = alloc_cursor(itable, 1))) /* +1 for new depth */ return -ENOMEM; down_write(cursor-btree-lock); if ((err = probe(cursor, tux_inode(inode)-inum))) diff -r a49705ea1c95 user/kernel/namei.c --- a/user/kernel/namei.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/namei.c Wed Mar 04 03:26:33 2009 -0800 @@ -60,13 +60,14 @@ static int tux3_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { + struct sb *sb = tux_sb(dir-i_sb); struct inode *inode; int err; if (!huge_valid_dev(rdev)) return -EINVAL; - - change_begin(tux_sb(dir-i_sb)); + if ((err = reserve_begin(sb))) + return err; inode = tux_create_inode(dir, mode, rdev); err = PTR_ERR(inode); if (!IS_ERR(inode)) { @@ -81,7 +82,7 @@
Re: [Tux3] Patch: Preliminary attempt at nospace handing
Another small adjustment to make rm -r work without failing on nospace. Regards, Daniel diff -r a49705ea1c95 user/kernel/commit.c --- a/user/kernel/commit.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/commit.c Wed Mar 04 18:17:05 2009 -0800 @@ -32,6 +32,8 @@ int load_sb(struct sb *sb) sb-atomgen = from_be_u32(super-atomgen); sb-freeatom = from_be_u32(super-freeatom); sb-dictsize = from_be_u64(super-dictsize); + sb-minchange = 8; /* total blocks changed by smallest change */ + sb-margin = 100; // should be tunable? trace(blocksize %u, blockbits %u, blockmask %08x, sb-blocksize, sb-blockbits, sb-blockmask); trace(volblocks %Lu, freeblocks %Lu, nextalloc %Lu, @@ -236,6 +238,50 @@ int change_end(struct sb *sb) return 0; } +int reserve_credits(struct sb *sb, unsigned credits) +{ + sb-credits += credits; + if (sb-margin 0 sb-freeblocks sb-credits + sb-margin) { +#ifdef __KERNEL__ + struct block_device *bdev = vfs_sb(sb)-s_bdev; + warn( %Lx free, %Lx credits, (L)sb-freeblocks, (L)sb-credits); + sb-margin = -sb-margin; + generic_sync_sb_inodes(vfs_sb(sb), (struct writeback_control){ + .sync_mode = WB_SYNC_ALL, + .range_end = LLONG_MAX, + .nr_to_write = LONG_MAX }); + sb-credits = 0; + thaw_bdev(bdev, vfs_sb(sb)); + sb-margin = -sb-margin; +#endif + warn( nospace = %i, sb-freeblocks sb-credits + sb-margin); + if (sb-freeblocks sb-credits + sb-margin) { + return -ENOSPC; + } + } + return 0; +} + +void release_credits(struct sb *sb, unsigned credits) +{ + // try to return unused credits to pool // +} + +int reserve_begin(struct sb *sb) +{ + int err; + if ((err = reserve_credits(sb, sb-minchange))) + return err; + return change_begin(sb); +} + +int reduce_begin(struct sb *sb) +{ +// int err; +// if ((err = reserve_credits(sb, 0))) +// return err; + return change_begin(sb); +} #ifdef __KERNEL__ static void *useme[] = { clean_buffer, need_delta, stage_delta, commit_delta, useme }; #endif diff -r a49705ea1c95 user/kernel/dir.c --- a/user/kernel/dir.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/dir.c Wed Mar 04 18:17:05 2009 -0800 @@ -132,7 +132,7 @@ loff_t _tux_create_entry(struct inode *d while (entry = limit) { if (entry-rec_len == 0) { brelse(buffer); -tux_error(dir-i_sb, zero-length directory entry); +warn([CORRUPTION] Zero length directory record detected!); return -EIO; } name_len = TUX_REC_LEN(entry-name_len); diff -r a49705ea1c95 user/kernel/filemap.c --- a/user/kernel/filemap.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/filemap.c Wed Mar 04 18:17:05 2009 -0800 @@ -511,7 +511,11 @@ static int tux3_da_write_begin(struct fi loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + struct sb *sb = tux_sb(mapping-host-i_sb); + int err; *pagep = NULL; + if ((err = reserve_credits(sb, (len sb-blockbits) + 10))) + return err; return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, tux3_da_get_block); } @@ -519,8 +523,10 @@ static int tux3_writepage(struct page *p static int tux3_writepage(struct page *page, struct writeback_control *wbc) { struct sb *sb = tux_sb(page-mapping-host-i_sb); - change_begin(sb); - int err = block_write_full_page(page, tux3_get_block, wbc); + int err = change_begin(sb); + if (err) + return err; + err = block_write_full_page(page, tux3_get_block, wbc); change_end(sb); return err; } diff -r a49705ea1c95 user/kernel/inode.c --- a/user/kernel/inode.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/inode.c Wed Mar 04 18:17:05 2009 -0800 @@ -233,9 +233,10 @@ static int save_inode(struct inode *inod trace(save inode 0x%Lx, (L)tux_inode(inode)-inum); struct sb *sb = tux_sb(inode-i_sb); struct btree *itable = itable_btree(sb); + struct cursor *cursor; int err; - struct cursor *cursor = alloc_cursor(itable, 1); /* +1 for new depth */ - if (!cursor) + + if (!(cursor = alloc_cursor(itable, 1))) /* +1 for new depth */ return -ENOMEM; down_write(cursor-btree-lock); if ((err = probe(cursor, tux_inode(inode)-inum))) diff -r a49705ea1c95 user/kernel/namei.c --- a/user/kernel/namei.c Tue Mar 03 20:43:11 2009 -0800 +++ b/user/kernel/namei.c Wed Mar 04 18:17:05 2009 -0800 @@ -60,13 +60,14 @@ static int tux_del_dirent(struct inode * static int tux3_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { + struct sb *sb = tux_sb(dir-i_sb); struct inode *inode; int err; if (!huge_valid_dev(rdev)) return -EINVAL; - - change_begin(tux_sb(dir-i_sb)); + if ((err = reserve_begin(sb))) + return err; inode = tux_create_inode(dir, mode, rdev); err = PTR_ERR(inode); if (!IS_ERR(inode)) { @@ -81,7 +82,7 @@ static int tux3_mknod(struct inode *dir, iput(inode); } out: - change_end(tux_sb(dir-i_sb)); + change_end(sb); return err; } @@ -100,13 +101,14 @@ static int tux3_link(struct dentry *old_ static int tux3_link(struct dentry *old_dentry, struct inode *dir, struct