How about this patch? thanks, wengang.
wengang wang wrote: > Changes from V1: > 1) separate lines into sub-functions. > 2) use mlog(0, .. instead of mlog(ML_NOTICE, .. for stale error. > > #patch based on ocfs2 1.4 git. > > Signed-off-by: Wengang Wang <[email protected]> > -- > cluster/masklog.h | 2 - > dlmglue.c | 31 +++++++++------ > export.c | 92 +++++++++++++++++++++++++++++++++++++++++----- > inode.c | 107 > +++++++++++++++++++++++++++++++++++------------------- > inode.h | 1 > suballoc.c | 80 ++++++++++++++++++++++++++++++++++++++++ > suballoc.h | 9 ++++ > 7 files changed, 262 insertions(+), 60 deletions(-) > > Index: fs/ocfs2/cluster/masklog.h > =================================================================== > --- fs/ocfs2/cluster/masklog.h (revision 38) > +++ fs/ocfs2/cluster/masklog.h (working copy) > @@ -114,7 +114,7 @@ > #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ > /* bits that are infrequently given and frequently matched in the high word > */ > #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ > -#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ > +#define ML_NOTICE 0x0000000200000000ULL /* sent to KERN_NOTICE */ > #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ > > #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) > Index: fs/ocfs2/export.c > =================================================================== > --- fs/ocfs2/export.c (revision 38) > +++ fs/ocfs2/export.c (working copy) > @@ -38,6 +38,8 @@ > #include "inode.h" > > #include "buffer_head_io.h" > +#include "sysfile.h" > +#include "suballoc.h" > > struct ocfs2_inode_handle > { > @@ -48,24 +50,92 @@ struct ocfs2_inode_handle > static struct dentry *ocfs2_get_dentry(struct super_block *sb, void *vobjp) > { > struct ocfs2_inode_handle *handle = vobjp; > - struct inode *inode; > + struct ocfs2_super *osb = OCFS2_SB(sb); > struct dentry *result; > - > + struct inode *inode, *inode_alloc_inode; > + struct buffer_head *alloc_bh = NULL; > + u64 blkno = handle->ih_blkno; > + unsigned short suballoc_bit, suballoc_slot; > + int status, set; > + > mlog_entry("(0x%p, 0x%p)\n", sb, handle); > > - if (handle->ih_blkno == 0) { > - mlog_errno(-ESTALE); > - return ERR_PTR(-ESTALE); > + if (blkno == 0) { > + mlog(0, "nfs wants inode with blkno: 0\n"); > + result = ERR_PTR(-ESTALE); > + goto bail; > + } > + > + inode = ocfs2_ilookup(sb, ino_from_blkno(sb, blkno)); > + /* found in-memory inode, goes to check generation */ > + if (inode) > + goto check_gen; > + > + status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot, > + &suballoc_bit, 1); > + if (status < 0) > + goto check_err; > + > + inode_alloc_inode = > + ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, > + suballoc_slot); > + if (!inode_alloc_inode) { > + status = -EEXIST; > + goto check_err; > } > > - inode = ocfs2_iget(OCFS2_SB(sb), handle->ih_blkno, 0, 0); > + mutex_lock(&inode_alloc_inode->i_mutex); > + status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); > + if (status < 0) > + goto unlock_mutex; > + > + status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh, > + blkno, suballoc_bit, &set); > + if (status < 0) > + goto inode_unlock; > + > + /* allocate bit is clear, inode is a stale inode */ > + if (!set) { > + status = -ESTALE; > + goto inode_unlock; > + } > + > + inode = ocfs2_iget(OCFS2_SB(sb), blkno, 0, 0); > + > +inode_unlock: > + ocfs2_inode_unlock(inode_alloc_inode, 0); > +unlock_mutex: > + mutex_unlock(&inode_alloc_inode->i_mutex); > + iput(inode_alloc_inode); > + brelse(alloc_bh); > + > +check_err: > + > + if (status < 0) { > + if (status == -ESTALE) { > + mlog(0, "stale inode ino: %llu generation: %u\n", > + blkno, handle->ih_generation); > + } else { > + mlog_errno(status); > + } > > - if (IS_ERR(inode)) > - return (void *)inode; > + result = ERR_PTR(status); > + goto bail; > + } > > + if (IS_ERR(inode)) { > + mlog_errno((int)inode); > + result = (void *)inode; > + goto bail; > + } > + > +check_gen: > if (handle->ih_generation != inode->i_generation) { > iput(inode); > - return ERR_PTR(-ESTALE); > + mlog(0, "stale inode ino: %llu generation: %u\n", blkno, > + handle->ih_generation); > + result = ERR_PTR(-ESTALE); > + goto bail; > } > > result = d_alloc_anon(inode); > @@ -73,10 +143,12 @@ static struct dentry *ocfs2_get_dentry(s > if (!result) { > iput(inode); > mlog_errno(-ENOMEM); > - return ERR_PTR(-ENOMEM); > + result = ERR_PTR(-ENOMEM); > + goto bail; > } > result->d_op = &ocfs2_dentry_ops; > > +bail: > mlog_exit_ptr(result); > return result; > } > Index: fs/ocfs2/inode.c > =================================================================== > --- fs/ocfs2/inode.c (revision 38) > +++ fs/ocfs2/inode.c (working copy) > @@ -111,6 +111,17 @@ void ocfs2_get_inode_flags(struct ocfs2_ > oi->ip_attr |= OCFS2_DIRSYNC_FL; > } > > +struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno) > +{ > + struct ocfs2_find_inode_args args; > + > + args.fi_blkno = blkno; > + args.fi_flags = 0; > + args.fi_ino = ino_from_blkno(sb, blkno); > + args.fi_sysfile_type = 0; > + > + return ilookup5(sb, blkno, ocfs2_find_actor, &args); > +} > struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, > int sysfile_type) > { > @@ -571,41 +582,26 @@ out: > return status; > } > > -static int ocfs2_remove_inode(struct inode *inode, > +/* callers must have cluster lock inode_alloc_inode taken before calling > + * ocfs2_remove_inode > + * */ > +static int ocfs2_remove_inode(struct inode *inode_alloc_inode, > + struct buffer_head *suballoc_bh, > + struct inode *inode, > struct buffer_head *di_bh, > struct inode *orphan_dir_inode, > struct buffer_head *orphan_dir_bh) > { > int status; > - struct inode *inode_alloc_inode = NULL; > - struct buffer_head *inode_alloc_bh = NULL; > handle_t *handle; > struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); > struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; > > - inode_alloc_inode = > - ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE, > - le16_to_cpu(di->i_suballoc_slot)); > - if (!inode_alloc_inode) { > - status = -EEXIST; > - mlog_errno(status); > - goto bail; > - } > - > - mutex_lock(&inode_alloc_inode->i_mutex); > - status = ocfs2_inode_lock(inode_alloc_inode, &inode_alloc_bh, 1); > - if (status < 0) { > - mutex_unlock(&inode_alloc_inode->i_mutex); > - > - mlog_errno(status); > - goto bail; > - } > - > handle = ocfs2_start_trans(osb, OCFS2_DELETE_INODE_CREDITS); > if (IS_ERR(handle)) { > status = PTR_ERR(handle); > mlog_errno(status); > - goto bail_unlock; > + goto bail; > } > > status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode, > @@ -635,19 +631,13 @@ static int ocfs2_remove_inode(struct ino > ocfs2_remove_from_cache(inode, di_bh); > > status = ocfs2_free_dinode(handle, inode_alloc_inode, > - inode_alloc_bh, di); > + suballoc_bh, di); > if (status < 0) > mlog_errno(status); > > bail_commit: > ocfs2_commit_trans(osb, handle); > -bail_unlock: > - ocfs2_inode_unlock(inode_alloc_inode, 1); > - mutex_unlock(&inode_alloc_inode->i_mutex); > - brelse(inode_alloc_bh); > bail: > - iput(inode_alloc_inode); > - > return status; > } > > @@ -687,7 +677,9 @@ static void ocfs2_signal_wipe_completion > wake_up(&osb->osb_wipe_event); > } > > -static int ocfs2_wipe_inode(struct inode *inode, > +static int ocfs2_wipe_inode(struct inode *inode_alloc_inode, > + struct buffer_head *suballoc_bh, > + struct inode *inode, > struct buffer_head *di_bh) > { > int status, orphaned_slot; > @@ -734,8 +726,8 @@ static int ocfs2_wipe_inode(struct inode > goto bail_unlock_dir; > } > > - status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, > - orphan_dir_bh); > + status = ocfs2_remove_inode(inode_alloc_inode, suballoc_bh, inode, > + di_bh, orphan_dir_inode, orphan_dir_bh); > if (status < 0) > mlog_errno(status); > > @@ -904,7 +896,9 @@ void ocfs2_delete_inode(struct inode *in > { > int wipe, status; > sigset_t blocked, oldset; > - struct buffer_head *di_bh = NULL; > + struct buffer_head *di_bh = NULL, *suballoc_bh = NULL; > + struct inode *inode_alloc_inode = NULL; > + unsigned short suballoc_slot; > > mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); > > @@ -933,6 +927,40 @@ void ocfs2_delete_inode(struct inode *in > goto bail; > } > > + /* lock the suballoc inode first before locking against inode > + * its self > + * */ > + > + /* here we don't have cluster locked against inode, so we may > + * reads out non-up2date contents. > + * but because suballoc_slot never changes, reading out non-up2date > + * contents is no problem. > + * */ > + status = ocfs2_get_suballoc_slot_bit(OCFS2_SB(inode->i_sb), > + OCFS2_I(inode)->ip_blkno, > + &suballoc_slot, NULL, 0); > + if (status < 0) { > + mlog_errno(status); > + goto bail_sigmask; > + } > + > + inode_alloc_inode = > + ocfs2_get_system_file_inode(OCFS2_SB(inode->i_sb), > + INODE_ALLOC_SYSTEM_INODE, > + suballoc_slot); > + if (!inode_alloc_inode) { > + status = -EEXIST; > + mlog_errno(status); > + goto bail_sigmask; > + } > + > + mutex_lock(&inode_alloc_inode->i_mutex); > + status = ocfs2_inode_lock(inode_alloc_inode, &suballoc_bh, 1); > + if (status < 0) { > + mlog_errno(status); > + goto bail_mutex; > + } > + > /* Lock down the inode. This gives us an up to date view of > * it's metadata (for verification), and allows us to > * serialize delete_inode on multiple nodes. > @@ -946,7 +974,7 @@ void ocfs2_delete_inode(struct inode *in > if (status != -ENOENT) > mlog_errno(status); > ocfs2_cleanup_delete_inode(inode, 0); > - goto bail_unblock; > + goto bail_unlock_alloc_inode; > } > > /* Query the cluster. This will be the final decision made > @@ -968,7 +996,8 @@ void ocfs2_delete_inode(struct inode *in > > ocfs2_cleanup_delete_inode(inode, 0); > > - status = ocfs2_wipe_inode(inode, di_bh); > + status = ocfs2_wipe_inode(inode_alloc_inode, suballoc_bh, inode, > + di_bh); > if (status < 0) { > if (status != -EDEADLK) > mlog_errno(status); > @@ -989,7 +1018,13 @@ void ocfs2_delete_inode(struct inode *in > bail_unlock_inode: > ocfs2_inode_unlock(inode, 1); > brelse(di_bh); > -bail_unblock: > +bail_unlock_alloc_inode: > + ocfs2_inode_unlock(inode_alloc_inode, 1); > + brelse(suballoc_bh); > +bail_mutex: > + mutex_unlock(&inode_alloc_inode->i_mutex); > + iput(inode_alloc_inode); > +bail_sigmask: > status = sigprocmask(SIG_SETMASK, &oldset, NULL); > if (status < 0) > mlog_errno(status); > Index: fs/ocfs2/suballoc.c > =================================================================== > --- fs/ocfs2/suballoc.c (revision 38) > +++ fs/ocfs2/suballoc.c (working copy) > @@ -1891,3 +1891,83 @@ static inline void ocfs2_debug_suballoc_ > (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); > } > } > + > +/* reads(hit disk when dirty_read is true) the inode specified by blkno to > get > + * suballoc_slot and suballoc_bit > + * */ > +int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, > + unsigned short *suballoc_slot, > + unsigned short *suballoc_bit, > + int dirty_read) > +{ > + int status; > + struct buffer_head *inode_bh = NULL; > + struct ocfs2_dinode *inode_fe; > + int flags = 0; > + > + mlog_entry("blkno: %llu\n", blkno); > + if (!dirty_read) > + flags |= OCFS2_BH_CACHED; > + status = ocfs2_read_block(osb, blkno, &inode_bh, flags, NULL); > + if (status < 0) > + goto bail; > + > + inode_fe = (struct ocfs2_dinode *) inode_bh->b_data; > + if (!OCFS2_IS_VALID_DINODE(inode_fe)) { > + status = -EINVAL; > + goto bail; > + } > + > + if (suballoc_slot) > + *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot); > + if (suballoc_bit) > + *suballoc_bit= le16_to_cpu(inode_fe->i_suballoc_bit); > + > +bail: > + brelse(inode_bh); > + > + mlog_exit(status); > + return status; > +} > + > +/* test bit bit is SET in allocator bitmap or not. > + * on success, 0 is returned and *res is 1 for SET; 0 otherwise. > + * when fails, errno is returned and *res is meaningless. > + * calls this after you have cluster locked against suballoc, or you may > + * get a result based on non-up2date contents > + * */ > +int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, > + struct buffer_head *alloc_bh, u64 blkno, > + unsigned short bit, int *res) > +{ > + struct ocfs2_dinode *alloc_fe; > + struct ocfs2_group_desc *group; > + struct buffer_head *group_bh = NULL; > + u64 bg_blkno; > + int status; > + > + mlog_entry("blkno: %llu bit: %u\n", blkno, (unsigned int)bit); > + BUG_ON(!res); > + > + alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; > + BUG_ON((bit + 1) > > + ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); > + > + bg_blkno = ocfs2_which_suballoc_group(blkno, bit); > + status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, > + suballoc); > + if (status < 0) > + goto bail; > + > + group = (struct ocfs2_group_desc *) group_bh->b_data; > + status = ocfs2_check_group_descriptor(osb->sb, alloc_fe, group); > + if (status < 0) > + goto bail; > + > + *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap); > +bail: > + brelse(group_bh); > + > + mlog_exit(status); > + return status; > +} > Index: fs/ocfs2/suballoc.h > =================================================================== > --- fs/ocfs2/suballoc.h (revision 38) > +++ fs/ocfs2/suballoc.h (working copy) > @@ -156,4 +156,13 @@ u64 ocfs2_which_cluster_group(struct ino > int ocfs2_check_group_descriptor(struct super_block *sb, > struct ocfs2_dinode *di, > struct ocfs2_group_desc *gd); > + > +int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno, > + unsigned short *suballoc_slot, > + unsigned short *suballoc_bit, > + int dirty_read); > + > +int ocfs2_test_suballoc_bit(struct ocfs2_super *osb, struct inode *suballoc, > + struct buffer_head *alloc_bh, u64 blkno, > + unsigned short bit, int *res); > #endif /* _CHAINALLOC_H_ */ > Index: fs/ocfs2/dlmglue.c > =================================================================== > --- fs/ocfs2/dlmglue.c (revision 38) > +++ fs/ocfs2/dlmglue.c (working copy) > @@ -1940,19 +1940,24 @@ static int ocfs2_inode_lock_update(struc > status = -EIO; > goto bail_refresh; > } > - mlog_bug_on_msg(inode->i_generation != > - le32_to_cpu(fe->i_generation), > - "Invalid dinode %llu disk generation: %u " > - "inode->i_generation: %u\n", > - (unsigned long long)oi->ip_blkno, > - le32_to_cpu(fe->i_generation), > - inode->i_generation); > - mlog_bug_on_msg(le64_to_cpu(fe->i_dtime) || > - !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL)), > - "Stale dinode %llu dtime: %llu flags: 0x%x\n", > - (unsigned long long)oi->ip_blkno, > - (unsigned long long)le64_to_cpu(fe->i_dtime), > - le32_to_cpu(fe->i_flags)); > + if (inode->i_generation != le32_to_cpu(fe->i_generation)) { > + mlog(0, "Stale inode %llu disk generation: %u " > + "inode generation: %u\n", > + (unsigned long long)oi->ip_blkno, > + le32_to_cpu(fe->i_generation), > + inode->i_generation); > + status = -ESTALE; > + goto bail_refresh; > + } > + if (le64_to_cpu(fe->i_dtime) || > + !(fe->i_flags & cpu_to_le32(OCFS2_VALID_FL))) { > + mlog(0, "Stale inode %llu dtime: %llu flags: 0x%x\n", > + (unsigned long long)oi->ip_blkno, > + (unsigned long long)le64_to_cpu(fe->i_dtime), > + le32_to_cpu(fe->i_flags)); > + status = -ESTALE; > + goto bail_refresh; > + } > > ocfs2_refresh_inode(inode, fe); > ocfs2_track_lock_refresh(lockres); > Index: fs/ocfs2/inode.h > =================================================================== > --- fs/ocfs2/inode.h (revision 38) > +++ fs/ocfs2/inode.h (working copy) > @@ -126,6 +126,7 @@ void ocfs2_drop_inode(struct inode *inod > /* Flags for ocfs2_iget() */ > #define OCFS2_FI_FLAG_SYSFILE 0x1 > #define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2 > +struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff); > struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags, > int sysfile_type); > int ocfs2_inode_init_private(struct inode *inode); > > _______________________________________________ > Ocfs2-devel mailing list > [email protected] > http://oss.oracle.com/mailman/listinfo/ocfs2-devel > _______________________________________________ Ocfs2-devel mailing list [email protected] http://oss.oracle.com/mailman/listinfo/ocfs2-devel
