Hi, On Wed, Jul 12, 2000 at 07:12:39PM +0200, Juan J. Quintela wrote: > > Hi > I have ported Stephen patch from 2.2.8 to 2.4, but now I have > contention in the inode bitmaps. Can I use the same trick > there that he used in the block bitmap??? I've attached the patch I did for 2.3.99-pre7. It uses a much simpler trick for inodes: it just drops the lock while waiting in the read_inode_bitmap functions and retakes it after IO. That means that the lock is still held for the duration of the allocation of the inode, but that's a much rarer and also a much more lightweight operation than block allocation, so it should work fine. Cheers, Stephen
--- linux-2.3.99/fs/ext2/balloc.c.~1~ Wed Mar 29 22:35:22 2000 +++ linux-2.3.99/fs/ext2/balloc.c Wed May 3 16:49:04 2000 @@ -9,6 +9,13 @@ * Enhanced block allocation by Stephen Tweedie ([EMAIL PROTECTED]), 1993 * Big-endian to little-endian byte-swapping/bitmaps by * David S. Miller ([EMAIL PROTECTED]), 1995 + * + * Dropped use of the superblock lock. + * This means that we have to take extra care not to block between any + * operations on the bitmap and the corresponding update to the group + * descriptors. + * Stephen C. Tweedie ([EMAIL PROTECTED]), 1999 + * */ #include <linux/config.h> @@ -16,7 +23,6 @@ #include <linux/locks.h> #include <linux/quotaops.h> - /* * balloc.c contains the blocks allocation and deallocation routines */ @@ -70,42 +76,33 @@ } /* - * Read the bitmap for a given block_group, reading into the specified - * slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group. * - * Return >=0 on success or a -ve error code. + * Return the buffer_head on success or NULL on IO error. */ -static int read_block_bitmap (struct super_block * sb, - unsigned int block_group, - unsigned long bitmap_nr) +static struct buffer_head * read_block_bitmap (struct super_block * sb, + unsigned int block_group) { struct ext2_group_desc * gdp; struct buffer_head * bh = NULL; - int retval = -EIO; gdp = ext2_get_group_desc (sb, block_group, NULL); if (!gdp) - goto error_out; - retval = 0; + return NULL; + bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_block_bitmap), sb->s_blocksize); if (!bh) { ext2_error (sb, "read_block_bitmap", "Cannot read block bitmap - " "block_group = %d, block_bitmap = %lu", block_group, (unsigned long) gdp->bg_block_bitmap); - retval = -EIO; + return NULL; } - /* - * On IO error, just leave a zero in the superblock's block pointer for - * this group. The IO will be retried next time. - */ -error_out: - sb->u.ext2_sb.s_block_bitmap_number[bitmap_nr] = block_group; - sb->u.ext2_sb.s_block_bitmap[bitmap_nr] = bh; - return retval; + return bh; } + /* * load_block_bitmap loads the block bitmap for a blocks group * @@ -122,9 +119,9 @@ static int __load_block_bitmap (struct super_block * sb, unsigned int block_group) { - int i, j, retval = 0; + int i, j; unsigned long block_bitmap_number; - struct buffer_head * block_bitmap; + struct buffer_head * block_bitmap, *cached_bitmap = NULL; if (block_group >= sb->u.ext2_sb.s_groups_count) ext2_panic (sb, "load_block_bitmap", @@ -132,27 +129,34 @@ "block_group = %d, groups_count = %lu", block_group, sb->u.ext2_sb.s_groups_count); + retry: if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) { if (sb->u.ext2_sb.s_block_bitmap[block_group]) { if (sb->u.ext2_sb.s_block_bitmap_number[block_group] == - block_group) + block_group) { + brelse(cached_bitmap); return block_group; + } ext2_error (sb, "__load_block_bitmap", "block_group != block_bitmap_number"); } - retval = read_block_bitmap (sb, block_group, block_group); - if (retval < 0) - return retval; + if (!cached_bitmap) + goto load; + sb->u.ext2_sb.s_block_bitmap_number[block_group] = block_group; + sb->u.ext2_sb.s_block_bitmap[block_group] = cached_bitmap; return block_group; } for (i = 0; i < sb->u.ext2_sb.s_loaded_block_bitmaps && sb->u.ext2_sb.s_block_bitmap_number[i] != block_group; i++) ; + + /* Already cached? */ if (i < sb->u.ext2_sb.s_loaded_block_bitmaps && sb->u.ext2_sb.s_block_bitmap_number[i] == block_group) { block_bitmap_number = sb->u.ext2_sb.s_block_bitmap_number[i]; block_bitmap = sb->u.ext2_sb.s_block_bitmap[i]; + for (j = i; j > 0; j--) { sb->u.ext2_sb.s_block_bitmap_number[j] = sb->u.ext2_sb.s_block_bitmap_number[j - 1]; @@ -161,28 +165,36 @@ } sb->u.ext2_sb.s_block_bitmap_number[0] = block_bitmap_number; sb->u.ext2_sb.s_block_bitmap[0] = block_bitmap; + touch_buffer(block_bitmap); + brelse(cached_bitmap); + return 0; + } - /* - * There's still one special case here --- if block_bitmap == 0 - * then our last attempt to read the bitmap failed and we have - * just ended up caching that failure. Try again to read it. - */ - if (!block_bitmap) - retval = read_block_bitmap (sb, block_group, 0); - } else { - if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED) - sb->u.ext2_sb.s_loaded_block_bitmaps++; - else - brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]); - for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) { - sb->u.ext2_sb.s_block_bitmap_number[j] = - sb->u.ext2_sb.s_block_bitmap_number[j - 1]; - sb->u.ext2_sb.s_block_bitmap[j] = - sb->u.ext2_sb.s_block_bitmap[j - 1]; - } - retval = read_block_bitmap (sb, block_group, 0); + /* Have we loaded the right bitmap yet? */ + if (!cached_bitmap) + goto load; + + /* OK, shift the bitmaps round and install this new one in the list. */ + if (sb->u.ext2_sb.s_loaded_block_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->u.ext2_sb.s_loaded_block_bitmaps++; + else + brelse (sb->u.ext2_sb.s_block_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->u.ext2_sb.s_loaded_block_bitmaps - 1; j > 0; j--) { + sb->u.ext2_sb.s_block_bitmap_number[j] = + sb->u.ext2_sb.s_block_bitmap_number[j - 1]; + sb->u.ext2_sb.s_block_bitmap[j] = + sb->u.ext2_sb.s_block_bitmap[j - 1]; } - return retval; + + sb->u.ext2_sb.s_block_bitmap_number[0] = block_group; + sb->u.ext2_sb.s_block_bitmap[0] = cached_bitmap; + return 0; + + load: + cached_bitmap = read_block_bitmap(sb, block_group); + if (!cached_bitmap) + return -EIO; + goto retry; } /* @@ -198,19 +210,21 @@ * differentiating between a group for which we have never performed a bitmap * IO request, and a group for which the last bitmap read request failed. */ -static inline int load_block_bitmap (struct super_block * sb, - unsigned int block_group) +static struct buffer_head * load_block_bitmap (struct super_block * sb, + unsigned int block_group) { int slot; - + struct buffer_head *bh; + /* * Do the lookup for the slot. First of all, check if we're asking * for the same slot as last time, and did we succeed that last time? */ if (sb->u.ext2_sb.s_loaded_block_bitmaps > 0 && sb->u.ext2_sb.s_block_bitmap_number[0] == block_group && - sb->u.ext2_sb.s_block_bitmap[block_group]) { - return 0; + (bh = sb->u.ext2_sb.s_block_bitmap[0])) { + atomic_inc(&bh->b_count); + return bh; } /* * Or can we do a fast lookup based on a loaded group on a filesystem @@ -232,30 +246,31 @@ * <0 means we just got an error */ if (slot < 0) - return slot; + return NULL; /* * If it's a valid slot, we may still have cached a previous IO error, * in which case the bh in the superblock cache will be zero. */ - if (!sb->u.ext2_sb.s_block_bitmap[slot]) - return -EIO; + bh = sb->u.ext2_sb.s_block_bitmap[slot]; + if (!bh) + return NULL; /* * Must have been read in OK to get this far. */ - return slot; + atomic_inc(&bh->b_count); + return bh; } void ext2_free_blocks (const struct inode * inode, unsigned long block, unsigned long count) { - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct buffer_head * bh2; unsigned long block_group; unsigned long bit; unsigned long i; - int bitmap_nr; unsigned long overflow; struct super_block * sb; struct ext2_group_desc * gdp; @@ -266,7 +281,6 @@ printk ("ext2_free_blocks: nonexistent device"); return; } - lock_super (sb); es = sb->u.ext2_sb.s_es; if (block < le32_to_cpu(es->s_first_data_block) || (block + count) > le32_to_cpu(es->s_blocks_count)) { @@ -292,11 +306,11 @@ overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb); count -= overflow; } - bitmap_nr = load_block_bitmap (sb, block_group); - if (bitmap_nr < 0) + brelse (bh); + bh = load_block_bitmap (sb, block_group); + if (bh == NULL) goto error_return; - bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; gdp = ext2_get_group_desc (sb, block_group, &bh2); if (!gdp) goto error_return; @@ -318,11 +332,11 @@ "bit already cleared for block %lu", block); else { - DQUOT_FREE_BLOCK(sb, inode, 1); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1); es->s_free_blocks_count = cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1); + DQUOT_FREE_BLOCK(sb, inode, 1); } } @@ -341,7 +355,7 @@ } sb->s_dirt = 1; error_return: - unlock_super (sb); + brelse (bh); return; } @@ -355,11 +369,10 @@ int ext2_new_block (const struct inode * inode, unsigned long goal, u32 * prealloc_count, u32 * prealloc_block, int * err) { - struct buffer_head * bh; + struct buffer_head * bh = NULL; struct buffer_head * bh2; char * p, * r; int i, j, k, tmp; - int bitmap_nr; struct super_block * sb; struct ext2_group_desc * gdp; struct ext2_super_block * es; @@ -373,14 +386,12 @@ return 0; } - lock_super (sb); es = sb->u.ext2_sb.s_es; if (le32_to_cpu(es->s_free_blocks_count) <= le32_to_cpu(es->s_r_blocks_count) && ((sb->u.ext2_sb.s_resuid != current->fsuid) && (sb->u.ext2_sb.s_resgid == 0 || !in_group_p (sb->u.ext2_sb.s_resgid)) && !capable(CAP_SYS_RESOURCE))) { - unlock_super (sb); return 0; } @@ -404,12 +415,11 @@ if (j) goal_attempts++; #endif - bitmap_nr = load_block_bitmap (sb, i); - if (bitmap_nr < 0) + brelse (bh); + bh = load_block_bitmap (sb, i); + if (bh == NULL) goto io_error; - bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; - ext2_debug ("goal is at %d:%d.\n", i, j); if (!ext2_test_bit(j, bh->b_data)) { @@ -463,6 +473,8 @@ } ext2_debug ("Bit not found in block group %d.\n", i); + brelse (bh); + bh = NULL; /* * Now search the rest of the groups. We assume that @@ -473,23 +485,18 @@ if (i >= sb->u.ext2_sb.s_groups_count) i = 0; gdp = ext2_get_group_desc (sb, i, &bh2); - if (!gdp) { - *err = -EIO; - unlock_super (sb); - return 0; - } + if (!gdp) + goto io_error; if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) break; } if (k >= sb->u.ext2_sb.s_groups_count) { - unlock_super (sb); return 0; } - bitmap_nr = load_block_bitmap (sb, i); - if (bitmap_nr < 0) + bh = load_block_bitmap (sb, i); + if (bh == NULL) goto io_error; - bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; r = memscan(bh->b_data, 0, EXT2_BLOCKS_PER_GROUP(sb) >> 3); j = (r - bh->b_data) << 3; if (j < EXT2_BLOCKS_PER_GROUP(sb)) @@ -500,7 +507,7 @@ if (j >= EXT2_BLOCKS_PER_GROUP(sb)) { ext2_error (sb, "ext2_new_block", "Free blocks count corrupted for block group %d", i); - unlock_super (sb); + brelse (bh); return 0; } @@ -520,7 +527,7 @@ * Check quota for allocation of this block. */ if(DQUOT_ALLOC_BLOCK(sb, inode, 1)) { - unlock_super(sb); + brelse (bh); *err = -EDQUOT; return 0; } @@ -535,14 +542,20 @@ "Allocating block in system zone - " "block = %u", tmp); + /* + * The quota check may have blocked. That should be rare, but + * if it does happen, there's a chance another process will + * have allocated the current block in the mean time. We need + * to check and retry if the block is no longer free. + */ if (ext2_set_bit (j, bh->b_data)) { - ext2_warning (sb, "ext2_new_block", - "bit already set for block %d", j); DQUOT_FREE_BLOCK(sb, inode, 1); goto repeat; } ext2_debug ("found bit %d\n", j); + gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +- 1); + es->s_free_blocks_count = cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - +1); /* * Do block preallocation now if required. @@ -556,25 +569,26 @@ *prealloc_count = 0; *prealloc_block = tmp + 1; - for (k = 1; - k < prealloc_goal && (j + k) < EXT2_BLOCKS_PER_GROUP(sb); - k++) { - if (DQUOT_PREALLOC_BLOCK(sb, inode, 1)) - break; - if (ext2_set_bit (j + k, bh->b_data)) { - DQUOT_FREE_BLOCK(sb, inode, 1); - break; - } - (*prealloc_count)++; - } - gdp->bg_free_blocks_count = + if (!DQUOT_PREALLOC_BLOCK(sb, inode, 1)) { + for (k = 1; + k < prealloc_goal && + (j + k) < EXT2_BLOCKS_PER_GROUP(sb); + k++) { + if (ext2_set_bit (j + k, bh->b_data)) + break; + (*prealloc_count)++; + } + gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - *prealloc_count); - es->s_free_blocks_count = - cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - - *prealloc_count); - ext2_debug ("Preallocated a further %lu bits.\n", - *prealloc_count); + es->s_free_blocks_count = + cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - + *prealloc_count); + if (*prealloc_count < prealloc_goal) + DQUOT_FREE_BLOCK(sb, inode, prealloc_goal - +*prealloc_count); + ext2_debug ("Preallocated a further %lu bits.\n", + *prealloc_count); + } } #endif @@ -586,30 +600,29 @@ wait_on_buffer (bh); } + /* We are finished with the bitmap buffer now */ + brelse (bh); + if (j >= le32_to_cpu(es->s_blocks_count)) { ext2_error (sb, "ext2_new_block", "block(%d) >= blocks count(%d) - " "block_group = %d, es == %p ",j, le32_to_cpu(es->s_blocks_count), i, es); - unlock_super (sb); return 0; } ext2_debug ("allocating block %d. " "Goal hits %d of %d.\n", j, goal_hits, goal_attempts); - gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); mark_buffer_dirty(bh2, 1); - es->s_free_blocks_count = cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1); mark_buffer_dirty(sb->u.ext2_sb.s_sbh, 1); sb->s_dirt = 1; - unlock_super (sb); *err = 0; return j; io_error: *err = -EIO; - unlock_super (sb); + brelse (bh); return 0; } @@ -619,11 +632,9 @@ #ifdef EXT2FS_DEBUG struct ext2_super_block * es; unsigned long desc_count, bitmap_count, x; - int bitmap_nr; struct ext2_group_desc * gdp; int i; - lock_super (sb); es = sb->u.ext2_sb.s_es; desc_count = 0; bitmap_count = 0; @@ -642,10 +653,10 @@ printk ("group %d: stored = %d, counted = %lu\n", i, le16_to_cpu(gdp->bg_free_blocks_count), x); bitmap_count += x; + brelse (bh); } printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n", le32_to_cpu(es->s_free_blocks_count), desc_count, bitmap_count); - unlock_super (sb); return bitmap_count; #else return le32_to_cpu(sb->u.ext2_sb.s_es->s_free_blocks_count); @@ -702,11 +713,9 @@ if (!gdp) continue; desc_count += le16_to_cpu(gdp->bg_free_blocks_count); - bitmap_nr = load_block_bitmap (sb, i); - if (bitmap_nr < 0) + bh = load_block_bitmap (sb, i); + if (bh == NULL) continue; - - bh = sb->u.ext2_sb.s_block_bitmap[bitmap_nr]; if (!(sb->u.ext2_sb.s_feature_ro_compat & EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER) || --- linux-2.3.99/fs/ext2/ialloc.c.~1~ Wed Mar 29 22:35:22 2000 +++ linux-2.3.99/fs/ext2/ialloc.c Mon May 1 20:46:58 2000 @@ -35,42 +35,35 @@ /* - * Read the inode allocation bitmap for a given block_group, reading - * into the specified slot in the superblock's bitmap cache. + * Read the bitmap for a given block_group. * - * Return >=0 on success or a -ve error code. + * Return the buffer_head on success or NULL on IO error. */ -static int read_inode_bitmap (struct super_block * sb, - unsigned long block_group, - unsigned int bitmap_nr) + +static struct buffer_head * read_inode_bitmap (struct super_block * sb, + unsigned long block_group) { struct ext2_group_desc * gdp; struct buffer_head * bh = NULL; - int retval = 0; gdp = ext2_get_group_desc (sb, block_group, NULL); - if (!gdp) { - retval = -EIO; - goto error_out; - } + if (!gdp) + return NULL; + + unlock_super(sb); bh = bread (sb->s_dev, le32_to_cpu(gdp->bg_inode_bitmap), sb->s_blocksize); + lock_super(sb); if (!bh) { ext2_error (sb, "read_inode_bitmap", "Cannot read inode bitmap - " "block_group = %lu, inode_bitmap = %lu", block_group, (unsigned long) gdp->bg_inode_bitmap); - retval = -EIO; + return NULL; } - /* - * On IO error, just leave a zero in the superblock's block pointer for - * this group. The IO will be retried next time. - */ -error_out: - sb->u.ext2_sb.s_inode_bitmap_number[bitmap_nr] = block_group; - sb->u.ext2_sb.s_inode_bitmap[bitmap_nr] = bh; - return retval; + return bh; } + /* * load_inode_bitmap loads the inode bitmap for a blocks group * @@ -87,9 +80,9 @@ static int load_inode_bitmap (struct super_block * sb, unsigned int block_group) { - int i, j, retval = 0; + int i, j; unsigned long inode_bitmap_number; - struct buffer_head * inode_bitmap; + struct buffer_head * inode_bitmap, * cached_bitmap = NULL; if (block_group >= sb->u.ext2_sb.s_groups_count) ext2_panic (sb, "load_inode_bitmap", @@ -100,18 +93,23 @@ sb->u.ext2_sb.s_inode_bitmap_number[0] == block_group && sb->u.ext2_sb.s_inode_bitmap[0] != NULL) return 0; + + retry: + if (sb->u.ext2_sb.s_groups_count <= EXT2_MAX_GROUP_LOADED) { if (sb->u.ext2_sb.s_inode_bitmap[block_group]) { if (sb->u.ext2_sb.s_inode_bitmap_number[block_group] != block_group) ext2_panic (sb, "load_inode_bitmap", "block_group != inode_bitmap_number"); - else + else { + brelse(cached_bitmap); return block_group; + } } else { - retval = read_inode_bitmap (sb, block_group, - block_group); - if (retval < 0) - return retval; + if (!cached_bitmap) + goto load; + sb->u.ext2_sb.s_inode_bitmap_number[block_group] = block_group; + sb->u.ext2_sb.s_inode_bitmap[block_group] = cached_bitmap; return block_group; } } @@ -120,10 +118,13 @@ sb->u.ext2_sb.s_inode_bitmap_number[i] != block_group; i++) ; + + /* Already cached? */ if (i < sb->u.ext2_sb.s_loaded_inode_bitmaps && sb->u.ext2_sb.s_inode_bitmap_number[i] == block_group) { inode_bitmap_number = sb->u.ext2_sb.s_inode_bitmap_number[i]; inode_bitmap = sb->u.ext2_sb.s_inode_bitmap[i]; + for (j = i; j > 0; j--) { sb->u.ext2_sb.s_inode_bitmap_number[j] = sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; @@ -132,29 +133,35 @@ } sb->u.ext2_sb.s_inode_bitmap_number[0] = inode_bitmap_number; sb->u.ext2_sb.s_inode_bitmap[0] = inode_bitmap; + touch_buffer(inode_bitmap); + brelse(cached_bitmap); + return 0; + } - /* - * There's still one special case here --- if inode_bitmap == 0 - * then our last attempt to read the bitmap failed and we have - * just ended up caching that failure. Try again to read it. - */ - if (!inode_bitmap) - retval = read_inode_bitmap (sb, block_group, 0); - - } else { - if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) - sb->u.ext2_sb.s_loaded_inode_bitmaps++; - else - brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); - for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) { - sb->u.ext2_sb.s_inode_bitmap_number[j] = - sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; - sb->u.ext2_sb.s_inode_bitmap[j] = - sb->u.ext2_sb.s_inode_bitmap[j - 1]; - } - retval = read_inode_bitmap (sb, block_group, 0); + /* Have we loaded the right bitmap yet? */ + if (!cached_bitmap) + goto load; + + if (sb->u.ext2_sb.s_loaded_inode_bitmaps < EXT2_MAX_GROUP_LOADED) + sb->u.ext2_sb.s_loaded_inode_bitmaps++; + else + brelse (sb->u.ext2_sb.s_inode_bitmap[EXT2_MAX_GROUP_LOADED - 1]); + for (j = sb->u.ext2_sb.s_loaded_inode_bitmaps - 1; j > 0; j--) { + sb->u.ext2_sb.s_inode_bitmap_number[j] = + sb->u.ext2_sb.s_inode_bitmap_number[j - 1]; + sb->u.ext2_sb.s_inode_bitmap[j] = + sb->u.ext2_sb.s_inode_bitmap[j - 1]; } - return retval; + + sb->u.ext2_sb.s_inode_bitmap_number[0] = block_group; + sb->u.ext2_sb.s_inode_bitmap[0] = cached_bitmap; + return 0; + + load: + cached_bitmap = read_inode_bitmap(sb, block_group); + if (!cached_bitmap) + return -EIO; + goto retry; } /*