This diff against my previous patch splits ext2_new_inode into front end 
and back end parts in preparation for deferring the back end part.  The 
front end just initializes the in-memory inode as with ramfs and leaves 
the inode number unassigned.  The back end goes fishing around in inode 
allocation maps to find a suitable free inode and assigns the inode 
number to the in-memory inode.  Right now, the front end part just 
calls the back end, so not much has changed yet.

In the next iteration, the back end part will be deferred so that when a 
file is created and before a sync is done, the file has no inode 
number.  This raises a couple of issues:

  * What about sys_fstat, which exposes the inode number to user
    applications?

  * What about NFS, which needs inode numbers to generate stable
    handles?

The inode number is not actually used in that many places in Ext2, which 
is good.  The most important user is ext2_iget which looks up an inode 
in the vfs inode cache given an inode number.  This is only used in two 
places: ext2_lookup and ext2_get_parent.  The latter is for NFS, which 
we will worry about later.  The former does a "real lookup" in the 
filesystem for any name the vfs fails to find in the dentry cache.  But 
we pin the new dentry in cache just to ensure that a real lookup is 
never performed for a new inode before we complete the deferred back 
end update of filesystem blocks.

To give NFS the real inodes it needs we would introduce a "wait on ino 
assignment" operation, however that is outside the scope of this Ext2 
patch.  Tux3 will have this, but all we want to demonstrate with Ext2 
is that namespace consistency can be maintained while updates to 
directory and inode table blocks are deferred.

Sys_fstat will use the wait-on-ino-assigned strategy.  This will most 
likely be implemented as a wait-on-bit operation, and we introduce a 
new inode flag to indicate an inode number has been assigned.  (The 
kernel wait-on-bit facility uses hashed locks that do not require 
adding new lock or wait fields to objects, so the space cost is just 
one new flags bit.)

Quota subsystem initialization and security hooks add additional 
complexity to the new_inode regimen for Ext2.  I am not sure whether to 
do those things in the front end or back end.  Probably in the front 
end, but as I did not look closely at this, I left them in the back end 
for now.

Regards,

Daniel
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 47d88da..16c66a5 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -117,7 +117,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page
 extern int ext2_sync_file (struct file *, struct dentry *, int);
 
 /* ialloc.c */
-extern struct inode * ext2_new_inode (struct inode *, int);
 extern void ext2_free_inode (struct inode *);
 extern unsigned long ext2_count_free_inodes (struct super_block *);
 extern void ext2_check_inodes_bitmap (struct super_block *);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index f597413..c0ce505 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -435,25 +435,19 @@ found:
 	return group;
 }
 
-struct inode *ext2_new_inode(struct inode *dir, int mode)
+int ext2_assign_ino(struct inode *dir, struct inode *inode)
 {
-	struct super_block *sb;
+	struct super_block *sb = dir->i_sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *bh2;
-	int group, i;
+	int group, i, mode = inode->i_mode;
 	ino_t ino = 0;
-	struct inode * inode;
 	struct ext2_group_desc *gdp;
 	struct ext2_super_block *es;
 	struct ext2_inode_info *ei;
 	struct ext2_sb_info *sbi;
 	int err;
 
-	sb = dir->i_sb;
-	inode = new_inode(sb);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-
 	ei = EXT2_I(inode);
 	sbi = EXT2_SB(sb);
 	es = sbi->s_es;
@@ -550,41 +544,8 @@ got:
 
 	sb->s_dirt = 1;
 	mark_buffer_dirty(bh2);
-	inode->i_uid = current->fsuid;
-	if (test_opt (sb, GRPID))
-		inode->i_gid = dir->i_gid;
-	else if (dir->i_mode & S_ISGID) {
-		inode->i_gid = dir->i_gid;
-		if (S_ISDIR(mode))
-			mode |= S_ISGID;
-	} else
-		inode->i_gid = current->fsgid;
-	inode->i_mode = mode;
-
-	inode->i_ino = ino;
-	inode->i_blocks = 0;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	memset(ei->i_data, 0, sizeof(ei->i_data));
-	ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
-	if (S_ISLNK(mode))
-		ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
-	/* dirsync is only applied to directories */
-	if (!S_ISDIR(mode))
-		ei->i_flags &= ~EXT2_DIRSYNC_FL;
-	ei->i_faddr = 0;
-	ei->i_frag_no = 0;
-	ei->i_frag_size = 0;
-	ei->i_file_acl = 0;
-	ei->i_dir_acl = 0;
-	ei->i_dtime = 0;
-	ei->i_block_alloc_info = NULL;
 	ei->i_block_group = group;
-	ei->i_dir_start_lookup = 0;
-	ei->i_state = EXT2_STATE_NEW;
-	ext2_set_inode_flags(inode);
-	spin_lock(&sbi->s_next_gen_lock);
-	inode->i_generation = sbi->s_next_generation++;
-	spin_unlock(&sbi->s_next_gen_lock);
+	inode->i_ino = ino;
 	insert_inode_hash(inode);
 
 	if (DQUOT_ALLOC_INODE(inode)) {
@@ -603,7 +564,7 @@ got:
 	mark_inode_dirty(inode);
 	ext2_debug("allocating inode %lu\n", inode->i_ino);
 	ext2_preread_inode(inode);
-	return inode;
+	return 0;
 
 fail_free_drop:
 	DQUOT_FREE_INODE(inode);
@@ -612,13 +573,11 @@ fail_drop:
 	DQUOT_DROP(inode);
 	inode->i_flags |= S_NOQUOTA;
 	inode->i_nlink = 0;
-	iput(inode);
-	return ERR_PTR(err);
+	return err;
 
 fail:
 	make_bad_inode(inode);
-	iput(inode);
-	return ERR_PTR(err);
+	return err;
 }
 
 unsigned long ext2_count_free_inodes (struct super_block * sb)
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 0ed08fc..c177fc8 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -69,6 +69,60 @@ static struct dentry_operations ext2_dentry_operations = {
 	.d_hide = ext2_hide_dentry,
 };
 
+int ext2_assign_ino(struct inode *dir, struct inode *inode);
+
+static struct inode *ext2_new_inode(struct inode *dir, int mode) {
+	struct super_block *sb = dir->i_sb;
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
+	struct inode *inode = new_inode(sb);
+	struct ext2_inode_info *ei;
+	int err;
+
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+
+	ei = EXT2_I(inode);
+	inode->i_mode = mode;
+	inode->i_uid = current->fsuid;
+	if (test_opt(sb, GRPID))
+		inode->i_gid = dir->i_gid;
+	else if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+	memset(ei->i_data, 0, sizeof(ei->i_data));
+	ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
+	if (S_ISLNK(mode))
+		ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
+	/* dirsync is only applied to directories */
+	if (!S_ISDIR(mode))
+		ei->i_flags &= ~EXT2_DIRSYNC_FL;
+	ei->i_faddr = 0;
+	ei->i_frag_no = 0;
+	ei->i_frag_size = 0;
+	ei->i_file_acl = 0;
+	ei->i_dir_acl = 0;
+	ei->i_dtime = 0;
+	ei->i_block_alloc_info = NULL;
+	ei->i_dir_start_lookup = 0;
+	ei->i_state = EXT2_STATE_NEW;
+	ext2_set_inode_flags(inode);
+	spin_lock(&sbi->s_next_gen_lock);
+	inode->i_generation = sbi->s_next_generation++;
+	spin_unlock(&sbi->s_next_gen_lock);
+
+	if ((err = ext2_assign_ino(dir, inode))) {
+		iput(inode);
+		return ERR_PTR(err);
+	}
+	return inode;
+}
+
 static int ext2_unlink(struct inode *dir, struct dentry *dentry)
 {
 	show_dentry("defer unlink", dentry);
_______________________________________________
Tux3 mailing list
[email protected]
http://mailman.tux3.org/cgi-bin/mailman/listinfo/tux3

Reply via email to