RE: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Namjae Jeon
> 
> On Mon, May 12, 2014 at 06:42:37PM +0900, Namjae Jeon wrote:
> >
> > > > +xfs_bmap_split_extent(
> > > > +   struct xfs_inode*ip,
> > > > +   xfs_fileoff_t   split_fsb,
> > > > +   xfs_extnum_t*split_ext)
> > > > +{
> > > > +   struct xfs_mount*mp = ip->i_mount;
> > > > +   struct xfs_trans*tp;
> > > > +   struct xfs_bmap_freefree_list;
> > > > +   xfs_fsblock_t   firstfsb;
> > > > +   int committed;
> > > > +   int error;
> > > > +
> > > > +   tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> > > > +   error = xfs_trans_reserve(tp, _RES(mp)->tr_write,
> > > > +   XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> > > > +
> > > > +   if (error) {
> > > > +   /*
> > > > +* Free the transaction structure.
> > > > +*/
> > > > +   ASSERT(XFS_FORCED_SHUTDOWN(mp));
> > >
> > Hi, Brian.
> > > As in the other patch, we're attempting to reserve fs blocks for the
> > > transaction, so ENOSPC is a possibility that I think the assert should
> > > accommodate.
> > How about removing the ASSERT completely as suggessted by Dave
> > in other thread?
> >
> 
> Yeah, that works too. If Dave prefers to just remove these asserts
> that's fine with me. I just wanted to make sure we aren't adding
> spurious asserts for valid failures.
Okay.
> 
> > >
> ...
> > > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > > index 97855c5..392b029 100644
> > > > --- a/fs/xfs/xfs_file.c
> > > > +++ b/fs/xfs/xfs_file.c
> > > > @@ -760,7 +760,8 @@ xfs_file_fallocate(
> > > > if (!S_ISREG(inode->i_mode))
> > > > return -EINVAL;
> > > > if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
> > > > -FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
> > > > +FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
> > > > +FALLOC_FL_INSERT_RANGE))
> > > > return -EOPNOTSUPP;
> > > >
> > > > xfs_ilock(ip, XFS_IOLOCK_EXCL);
> > > > @@ -790,6 +791,40 @@ xfs_file_fallocate(
> > > > error = xfs_collapse_file_space(ip, offset, len);
> > > > if (error)
> > > > goto out_unlock;
> > > > +   } else if (mode & FALLOC_FL_INSERT_RANGE) {
> > > > +   unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
> > > > +   struct iattr iattr;
> > > > +
> > > > +   if (offset & blksize_mask || len & blksize_mask) {
> > > > +   error = -EINVAL;
> > > > +   goto out_unlock;
> > > > +   }
> > > > +
> > > > +   /* Check for wrap through zero */
> > > > +   if (inode->i_size + len > inode->i_sb->s_maxbytes) {
> > > > +   error = -EFBIG;
> > > > +   goto out_unlock;
> > > > +   }
> > > > +
> > > > +   /* Offset should be less than i_size */
> > > > +   if (offset >= i_size_read(inode)) {
> > > > +   error = -EINVAL;
> > > > +   goto out_unlock;
> > > > +   }
> > > > +
> > > > +   /*
> > > > +* The first thing we do is to expand file to
> > > > +* avoid data loss if there is error while shifting
> > > > +*/
> > > > +   iattr.ia_valid = ATTR_SIZE;
> > > > +   iattr.ia_size = i_size_read(inode) + len;
> > > > +   error = xfs_setattr_size(ip, );
> > > > +   if (error)
> > > > +   goto out_unlock;
> > >
> > > I don't necessarily know that it's problematic to do the setattr before
> > > the bmap fixup. We'll have a chance for partial completion of this
> > > operation either way. But I'm not a fan of the code duplication here.
> > > This also still skips the time update in the event of insert space
> > > failure, though perhaps that's not such a big deal if we're returning an
> > > error.
> > >
> > > I think it would be better to leave things organized as before and
> > > introduce an error2 variable and a  or some such parameter to
> > > xfs_insert_file_space() that initializes to 0 and returns the number of
> > > record shifts. The caller can then decide whether it's appropriate to
> > > break out immediately or do the inode size update and return the error.
> > >
> > > Perhaps not the cleanest thing in the world, but also not the first
> > > place we would use 'error2' to manage error priorities (grep around for
> > > it)...
> > Yes, Right. I also thought such sequence at first. But we should consider
> > sudden power off and unplug device case during shifting extent.
> > While we are in the middle of shifitng extents and if there is sudden
> > power failure user can still think that data is lost as we won't get any
> > chance to 

Re: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Brian Foster
On Mon, May 12, 2014 at 06:42:37PM +0900, Namjae Jeon wrote:
> 
> > > +xfs_bmap_split_extent(
> > > + struct xfs_inode*ip,
> > > + xfs_fileoff_t   split_fsb,
> > > + xfs_extnum_t*split_ext)
> > > +{
> > > + struct xfs_mount*mp = ip->i_mount;
> > > + struct xfs_trans*tp;
> > > + struct xfs_bmap_freefree_list;
> > > + xfs_fsblock_t   firstfsb;
> > > + int committed;
> > > + int error;
> > > +
> > > + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> > > + error = xfs_trans_reserve(tp, _RES(mp)->tr_write,
> > > + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> > > +
> > > + if (error) {
> > > + /*
> > > +  * Free the transaction structure.
> > > +  */
> > > + ASSERT(XFS_FORCED_SHUTDOWN(mp));
> > 
> Hi, Brian.
> > As in the other patch, we're attempting to reserve fs blocks for the
> > transaction, so ENOSPC is a possibility that I think the assert should
> > accommodate.
> How about removing the ASSERT completely as suggessted by Dave
> in other thread?
> 

Yeah, that works too. If Dave prefers to just remove these asserts
that's fine with me. I just wanted to make sure we aren't adding
spurious asserts for valid failures.

> > 
...
> > > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > > index 97855c5..392b029 100644
> > > --- a/fs/xfs/xfs_file.c
> > > +++ b/fs/xfs/xfs_file.c
> > > @@ -760,7 +760,8 @@ xfs_file_fallocate(
> > >   if (!S_ISREG(inode->i_mode))
> > >   return -EINVAL;
> > >   if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
> > > -  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
> > > +  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
> > > +  FALLOC_FL_INSERT_RANGE))
> > >   return -EOPNOTSUPP;
> > >
> > >   xfs_ilock(ip, XFS_IOLOCK_EXCL);
> > > @@ -790,6 +791,40 @@ xfs_file_fallocate(
> > >   error = xfs_collapse_file_space(ip, offset, len);
> > >   if (error)
> > >   goto out_unlock;
> > > + } else if (mode & FALLOC_FL_INSERT_RANGE) {
> > > + unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
> > > + struct iattr iattr;
> > > +
> > > + if (offset & blksize_mask || len & blksize_mask) {
> > > + error = -EINVAL;
> > > + goto out_unlock;
> > > + }
> > > +
> > > + /* Check for wrap through zero */
> > > + if (inode->i_size + len > inode->i_sb->s_maxbytes) {
> > > + error = -EFBIG;
> > > + goto out_unlock;
> > > + }
> > > +
> > > + /* Offset should be less than i_size */
> > > + if (offset >= i_size_read(inode)) {
> > > + error = -EINVAL;
> > > + goto out_unlock;
> > > + }
> > > +
> > > + /*
> > > +  * The first thing we do is to expand file to
> > > +  * avoid data loss if there is error while shifting
> > > +  */
> > > + iattr.ia_valid = ATTR_SIZE;
> > > + iattr.ia_size = i_size_read(inode) + len;
> > > + error = xfs_setattr_size(ip, );
> > > + if (error)
> > > + goto out_unlock;
> > 
> > I don't necessarily know that it's problematic to do the setattr before
> > the bmap fixup. We'll have a chance for partial completion of this
> > operation either way. But I'm not a fan of the code duplication here.
> > This also still skips the time update in the event of insert space
> > failure, though perhaps that's not such a big deal if we're returning an
> > error.
> > 
> > I think it would be better to leave things organized as before and
> > introduce an error2 variable and a  or some such parameter to
> > xfs_insert_file_space() that initializes to 0 and returns the number of
> > record shifts. The caller can then decide whether it's appropriate to
> > break out immediately or do the inode size update and return the error.
> > 
> > Perhaps not the cleanest thing in the world, but also not the first
> > place we would use 'error2' to manage error priorities (grep around for
> > it)...
> Yes, Right. I also thought such sequence at first. But we should consider
> sudden power off and unplug device case during shifting extent.
> While we are in the middle of shifitng extents and if there is sudden
> power failure user can still think that data is lost as we won't get any
> chance to update the file size in these cases.
> Updating file size before the shifitng operation can start will prevent this.
> 
> Thanks.

Hmm, fair point. That seems less critical to me than the general error
sequence, but if we want to handle that case I think we could still fix
the duplication in xfs_file_fallocate(). We could possibly factor out
the common bits (update time and set size) into a helper, or what seems
a bit cleaner on first thought, move the bulk of the (mode &
FALLOC_FL_INSERT_RANGE) block to after the common part. Then the

RE: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Namjae Jeon

> > +xfs_bmap_split_extent(
> > +   struct xfs_inode*ip,
> > +   xfs_fileoff_t   split_fsb,
> > +   xfs_extnum_t*split_ext)
> > +{
> > +   struct xfs_mount*mp = ip->i_mount;
> > +   struct xfs_trans*tp;
> > +   struct xfs_bmap_freefree_list;
> > +   xfs_fsblock_t   firstfsb;
> > +   int committed;
> > +   int error;
> > +
> > +   tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
> > +   error = xfs_trans_reserve(tp, _RES(mp)->tr_write,
> > +   XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
> > +
> > +   if (error) {
> > +   /*
> > +* Free the transaction structure.
> > +*/
> > +   ASSERT(XFS_FORCED_SHUTDOWN(mp));
> 
Hi, Brian.
> As in the other patch, we're attempting to reserve fs blocks for the
> transaction, so ENOSPC is a possibility that I think the assert should
> accommodate.
How about removing the ASSERT completely as suggessted by Dave
in other thread?

> 
> > +   xfs_trans_cancel(tp, 0);
> > +   return error;
> > +   }
> > +

> > +
> > +   /*
> > +* Before shifting extent into hole, make sure that the hole
> > +* is large enough to accomodate the shift. This checking has
> > +* to be performed for all except the last extent.
> > +*/
> > +   last_extent = (ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
> > +   if (last_extent != *current_ext) {
> > +   xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
> > +   *current_ext + 1), );
> > +   if (startoff + got.br_blockcount > right.br_startoff) {
> > +   error = XFS_ERROR(EINVAL);
> > +   if (error)
> > +   goto del_cursor;
> > +   }
> > +   }
> > +
> > +   /* Check if we can merge 2 adjacent extents */
> > +   if (last_extent != *current_ext &&
> > +   right.br_startoff == startoff + got.br_blockcount &&
> > +   right.br_startblock ==
> > +   got.br_startblock + got.br_blockcount &&
> > +   right.br_state == got.br_state &&
> > +   right.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
> > +   blockcount = right.br_blockcount + got.br_blockcount;
> > +
> > +   /* Make cursor point to the extent we will update */
> 
> The comment could be more clear about what we're doing in this case. For
> example:
> 
> /*
>  * Merge the current extent with the extent to the right. Remove the right
>  * extent, calculate a new block count for the current extent to cover the 
> range
>  * of both and decrement the number of extents in the fork.
>  */
> 
> I'd also move the comment before the blockcount calculation.
okay, I will add it as your suggestion.
> 
> > +   if (cur) {
> > +   error = xfs_bmbt_lookup_eq(cur,
> > +  right.br_startoff,
> > +  right.br_startblock,
> > +  right.br_blockcount,
> > +  );
> > +   if (error)
> > +   goto del_cursor;
> > +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> > +   }
> > +
> > +   xfs_iext_remove(ip, *current_ext + 1, 1, 0);
> > +   if (cur) {
> > +   error = xfs_btree_delete(cur, );
> > +   if (error)
> > +   goto del_cursor;
> > +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> > +   }
> > +   XFS_IFORK_NEXT_SET(ip, whichfork,
> > +   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
> > +
> > +   }
> > +
> > +   if (cur) {
> > +   error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
> > +  got.br_startblock,
> > +  got.br_blockcount,
> > +  );
> > +   if (error)
> > +   goto del_cursor;
> > +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
> > +   }
> > +
> > +   if (got.br_blockcount < blockcount) {
> > +   xfs_bmbt_set_blockcount(gotp, blockcount);
> > +   got.br_blockcount = blockcount;
> > +   }
> 
> How about just 'if (blockcount)' so the algorithm is clear?
yes, more clear.
> 
> > +
> > +
> > diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> > index 97855c5..392b029 100644
> > --- a/fs/xfs/xfs_file.c
> > +++ 

RE: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Namjae Jeon

  +xfs_bmap_split_extent(
  +   struct xfs_inode*ip,
  +   xfs_fileoff_t   split_fsb,
  +   xfs_extnum_t*split_ext)
  +{
  +   struct xfs_mount*mp = ip-i_mount;
  +   struct xfs_trans*tp;
  +   struct xfs_bmap_freefree_list;
  +   xfs_fsblock_t   firstfsb;
  +   int committed;
  +   int error;
  +
  +   tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
  +   error = xfs_trans_reserve(tp, M_RES(mp)-tr_write,
  +   XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
  +
  +   if (error) {
  +   /*
  +* Free the transaction structure.
  +*/
  +   ASSERT(XFS_FORCED_SHUTDOWN(mp));
 
Hi, Brian.
 As in the other patch, we're attempting to reserve fs blocks for the
 transaction, so ENOSPC is a possibility that I think the assert should
 accommodate.
How about removing the ASSERT completely as suggessted by Dave
in other thread?

 
  +   xfs_trans_cancel(tp, 0);
  +   return error;
  +   }
  +

  +
  +   /*
  +* Before shifting extent into hole, make sure that the hole
  +* is large enough to accomodate the shift. This checking has
  +* to be performed for all except the last extent.
  +*/
  +   last_extent = (ifp-if_bytes / sizeof(xfs_bmbt_rec_t)) - 1;
  +   if (last_extent != *current_ext) {
  +   xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
  +   *current_ext + 1), right);
  +   if (startoff + got.br_blockcount  right.br_startoff) {
  +   error = XFS_ERROR(EINVAL);
  +   if (error)
  +   goto del_cursor;
  +   }
  +   }
  +
  +   /* Check if we can merge 2 adjacent extents */
  +   if (last_extent != *current_ext 
  +   right.br_startoff == startoff + got.br_blockcount 
  +   right.br_startblock ==
  +   got.br_startblock + got.br_blockcount 
  +   right.br_state == got.br_state 
  +   right.br_blockcount + got.br_blockcount = MAXEXTLEN) {
  +   blockcount = right.br_blockcount + got.br_blockcount;
  +
  +   /* Make cursor point to the extent we will update */
 
 The comment could be more clear about what we're doing in this case. For
 example:
 
 /*
  * Merge the current extent with the extent to the right. Remove the right
  * extent, calculate a new block count for the current extent to cover the 
 range
  * of both and decrement the number of extents in the fork.
  */
 
 I'd also move the comment before the blockcount calculation.
okay, I will add it as your suggestion.
 
  +   if (cur) {
  +   error = xfs_bmbt_lookup_eq(cur,
  +  right.br_startoff,
  +  right.br_startblock,
  +  right.br_blockcount,
  +  i);
  +   if (error)
  +   goto del_cursor;
  +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
  +   }
  +
  +   xfs_iext_remove(ip, *current_ext + 1, 1, 0);
  +   if (cur) {
  +   error = xfs_btree_delete(cur, i);
  +   if (error)
  +   goto del_cursor;
  +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
  +   }
  +   XFS_IFORK_NEXT_SET(ip, whichfork,
  +   XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
  +
  +   }
  +
  +   if (cur) {
  +   error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
  +  got.br_startblock,
  +  got.br_blockcount,
  +  i);
  +   if (error)
  +   goto del_cursor;
  +   XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
  +   }
  +
  +   if (got.br_blockcount  blockcount) {
  +   xfs_bmbt_set_blockcount(gotp, blockcount);
  +   got.br_blockcount = blockcount;
  +   }
 
 How about just 'if (blockcount)' so the algorithm is clear?
yes, more clear.
 
  +
  +
  diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
  index 97855c5..392b029 100644
  --- a/fs/xfs/xfs_file.c
  +++ b/fs/xfs/xfs_file.c
  @@ -760,7 +760,8 @@ xfs_file_fallocate(
  if (!S_ISREG(inode-i_mode))
  return -EINVAL;
  if (mode  ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
  -

Re: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Brian Foster
On Mon, May 12, 2014 at 06:42:37PM +0900, Namjae Jeon wrote:
 
   +xfs_bmap_split_extent(
   + struct xfs_inode*ip,
   + xfs_fileoff_t   split_fsb,
   + xfs_extnum_t*split_ext)
   +{
   + struct xfs_mount*mp = ip-i_mount;
   + struct xfs_trans*tp;
   + struct xfs_bmap_freefree_list;
   + xfs_fsblock_t   firstfsb;
   + int committed;
   + int error;
   +
   + tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
   + error = xfs_trans_reserve(tp, M_RES(mp)-tr_write,
   + XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
   +
   + if (error) {
   + /*
   +  * Free the transaction structure.
   +  */
   + ASSERT(XFS_FORCED_SHUTDOWN(mp));
  
 Hi, Brian.
  As in the other patch, we're attempting to reserve fs blocks for the
  transaction, so ENOSPC is a possibility that I think the assert should
  accommodate.
 How about removing the ASSERT completely as suggessted by Dave
 in other thread?
 

Yeah, that works too. If Dave prefers to just remove these asserts
that's fine with me. I just wanted to make sure we aren't adding
spurious asserts for valid failures.

  
...
   diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
   index 97855c5..392b029 100644
   --- a/fs/xfs/xfs_file.c
   +++ b/fs/xfs/xfs_file.c
   @@ -760,7 +760,8 @@ xfs_file_fallocate(
 if (!S_ISREG(inode-i_mode))
 return -EINVAL;
 if (mode  ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
   -  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
   +  FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
   +  FALLOC_FL_INSERT_RANGE))
 return -EOPNOTSUPP;
  
 xfs_ilock(ip, XFS_IOLOCK_EXCL);
   @@ -790,6 +791,40 @@ xfs_file_fallocate(
 error = xfs_collapse_file_space(ip, offset, len);
 if (error)
 goto out_unlock;
   + } else if (mode  FALLOC_FL_INSERT_RANGE) {
   + unsigned blksize_mask = (1  inode-i_blkbits) - 1;
   + struct iattr iattr;
   +
   + if (offset  blksize_mask || len  blksize_mask) {
   + error = -EINVAL;
   + goto out_unlock;
   + }
   +
   + /* Check for wrap through zero */
   + if (inode-i_size + len  inode-i_sb-s_maxbytes) {
   + error = -EFBIG;
   + goto out_unlock;
   + }
   +
   + /* Offset should be less than i_size */
   + if (offset = i_size_read(inode)) {
   + error = -EINVAL;
   + goto out_unlock;
   + }
   +
   + /*
   +  * The first thing we do is to expand file to
   +  * avoid data loss if there is error while shifting
   +  */
   + iattr.ia_valid = ATTR_SIZE;
   + iattr.ia_size = i_size_read(inode) + len;
   + error = xfs_setattr_size(ip, iattr);
   + if (error)
   + goto out_unlock;
  
  I don't necessarily know that it's problematic to do the setattr before
  the bmap fixup. We'll have a chance for partial completion of this
  operation either way. But I'm not a fan of the code duplication here.
  This also still skips the time update in the event of insert space
  failure, though perhaps that's not such a big deal if we're returning an
  error.
  
  I think it would be better to leave things organized as before and
  introduce an error2 variable and a nrshifts or some such parameter to
  xfs_insert_file_space() that initializes to 0 and returns the number of
  record shifts. The caller can then decide whether it's appropriate to
  break out immediately or do the inode size update and return the error.
  
  Perhaps not the cleanest thing in the world, but also not the first
  place we would use 'error2' to manage error priorities (grep around for
  it)...
 Yes, Right. I also thought such sequence at first. But we should consider
 sudden power off and unplug device case during shifting extent.
 While we are in the middle of shifitng extents and if there is sudden
 power failure user can still think that data is lost as we won't get any
 chance to update the file size in these cases.
 Updating file size before the shifitng operation can start will prevent this.
 
 Thanks.

Hmm, fair point. That seems less critical to me than the general error
sequence, but if we want to handle that case I think we could still fix
the duplication in xfs_file_fallocate(). We could possibly factor out
the common bits (update time and set size) into a helper, or what seems
a bit cleaner on first thought, move the bulk of the (mode 
FALLOC_FL_INSERT_RANGE) block to after the common part. Then the
function looks something like this:

...
xfs_ilock();

/* pre-inode fixup ops */
if (mode  ...) {
...
} else if (mode  FALLOC_FL_INSERT_RANGE) {
/* comment as to what's going on here :) */

  

RE: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-12 Thread Namjae Jeon
 
 On Mon, May 12, 2014 at 06:42:37PM +0900, Namjae Jeon wrote:
 
+xfs_bmap_split_extent(
+   struct xfs_inode*ip,
+   xfs_fileoff_t   split_fsb,
+   xfs_extnum_t*split_ext)
+{
+   struct xfs_mount*mp = ip-i_mount;
+   struct xfs_trans*tp;
+   struct xfs_bmap_freefree_list;
+   xfs_fsblock_t   firstfsb;
+   int committed;
+   int error;
+
+   tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+   error = xfs_trans_reserve(tp, M_RES(mp)-tr_write,
+   XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+
+   if (error) {
+   /*
+* Free the transaction structure.
+*/
+   ASSERT(XFS_FORCED_SHUTDOWN(mp));
  
  Hi, Brian.
   As in the other patch, we're attempting to reserve fs blocks for the
   transaction, so ENOSPC is a possibility that I think the assert should
   accommodate.
  How about removing the ASSERT completely as suggessted by Dave
  in other thread?
 
 
 Yeah, that works too. If Dave prefers to just remove these asserts
 that's fine with me. I just wanted to make sure we aren't adding
 spurious asserts for valid failures.
Okay.
 
  
 ...
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 97855c5..392b029 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -760,7 +760,8 @@ xfs_file_fallocate(
if (!S_ISREG(inode-i_mode))
return -EINVAL;
if (mode  ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
+FALLOC_FL_INSERT_RANGE))
return -EOPNOTSUPP;
   
xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -790,6 +791,40 @@ xfs_file_fallocate(
error = xfs_collapse_file_space(ip, offset, len);
if (error)
goto out_unlock;
+   } else if (mode  FALLOC_FL_INSERT_RANGE) {
+   unsigned blksize_mask = (1  inode-i_blkbits) - 1;
+   struct iattr iattr;
+
+   if (offset  blksize_mask || len  blksize_mask) {
+   error = -EINVAL;
+   goto out_unlock;
+   }
+
+   /* Check for wrap through zero */
+   if (inode-i_size + len  inode-i_sb-s_maxbytes) {
+   error = -EFBIG;
+   goto out_unlock;
+   }
+
+   /* Offset should be less than i_size */
+   if (offset = i_size_read(inode)) {
+   error = -EINVAL;
+   goto out_unlock;
+   }
+
+   /*
+* The first thing we do is to expand file to
+* avoid data loss if there is error while shifting
+*/
+   iattr.ia_valid = ATTR_SIZE;
+   iattr.ia_size = i_size_read(inode) + len;
+   error = xfs_setattr_size(ip, iattr);
+   if (error)
+   goto out_unlock;
  
   I don't necessarily know that it's problematic to do the setattr before
   the bmap fixup. We'll have a chance for partial completion of this
   operation either way. But I'm not a fan of the code duplication here.
   This also still skips the time update in the event of insert space
   failure, though perhaps that's not such a big deal if we're returning an
   error.
  
   I think it would be better to leave things organized as before and
   introduce an error2 variable and a nrshifts or some such parameter to
   xfs_insert_file_space() that initializes to 0 and returns the number of
   record shifts. The caller can then decide whether it's appropriate to
   break out immediately or do the inode size update and return the error.
  
   Perhaps not the cleanest thing in the world, but also not the first
   place we would use 'error2' to manage error priorities (grep around for
   it)...
  Yes, Right. I also thought such sequence at first. But we should consider
  sudden power off and unplug device case during shifting extent.
  While we are in the middle of shifitng extents and if there is sudden
  power failure user can still think that data is lost as we won't get any
  chance to update the file size in these cases.
  Updating file size before the shifitng operation can start will prevent 
  this.
 
  Thanks.
 
 Hmm, fair point. That seems less critical to me than the general error
 sequence, but if we want to handle that case I think we could still fix
 the duplication in xfs_file_fallocate(). We could possibly factor out
 the common bits (update 

Re: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-09 Thread Brian Foster
On Thu, May 08, 2014 at 07:26:16PM +0900, Namjae Jeon wrote:
> This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.
> 
> 1) Make sure that both offset and len are block size aligned.
> 2) Update the i_size of inode by len bytes.
> 3) Compute the file's logical block number against offset. If the computed
>block number is not the starting block of the extent, split the extent
>such that the block number is the starting block of the extent.
> 4) Shift all the extents which are lying bewteen [offset, last allocated 
> extent]
>towards right by len bytes. This step will make a hole of len bytes
>at offset.
> 5) Allocate unwritten extents for the hole created in step 4.
> 
> Signed-off-by: Namjae Jeon 
> Signed-off-by: Ashish Sangwan 
> ---
>  fs/xfs/xfs_bmap.c  | 372 
> -
>  fs/xfs/xfs_bmap.h  |   9 +-
>  fs/xfs/xfs_bmap_util.c | 129 -
>  fs/xfs/xfs_bmap_util.h |   2 +
>  fs/xfs/xfs_file.c  |  37 -
>  fs/xfs/xfs_trace.h |   1 +
>  6 files changed, 545 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
> index 1ff0da6..e24aa14 100644
> --- a/fs/xfs/xfs_bmap.c
> +++ b/fs/xfs/xfs_bmap.c
> @@ -5419,7 +5419,7 @@ error0:
>   * into, this will be considered invalid operation and we abort immediately.
>   */
>  int
> -xfs_bmap_shift_extents(
> +xfs_bmap_shift_extents_left(
>   struct xfs_trans*tp,
>   struct xfs_inode*ip,
>   int *done,
> @@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
>   (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
>XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
>mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> - XFS_ERROR_REPORT("xfs_bmap_shift_extents",
> + XFS_ERROR_REPORT("xfs_bmap_shift_extents_left",
>XFS_ERRLEVEL_LOW, mp);
>   return XFS_ERROR(EFSCORRUPTED);
>   }
> @@ -5606,3 +5606,371 @@ del_cursor:
>   xfs_trans_log_inode(tp, ip, logflags);
>   return error;
>  }
> +
> +/*
> + * Splits an extent into two extents at split_fsb block that it is
> + * the first block of the current_ext. @current_ext is a target extent
> + * to be splitted. @split_fsb is a block where the extents is spliited.
> + * If split_fsb lies in a hole or the first block of extents, just return 0.
> + */
> +STATIC int
> +xfs_bmap_split_extent_at(
> + struct xfs_trans*tp,
> + struct xfs_inode*ip,
> + xfs_fileoff_t   split_fsb,
> + xfs_extnum_t*current_ext,
> + xfs_fsblock_t   *firstfsb,
> + struct xfs_bmap_free*free_list)
> +{
> + int whichfork = XFS_DATA_FORK;
> + struct xfs_btree_cur*cur;
> + struct xfs_bmbt_rec_host*gotp;
> + struct xfs_bmbt_irecgot;
> + struct xfs_bmbt_irecnew; /* splitted extent */
> + struct xfs_mount*mp = ip->i_mount;
> + struct xfs_ifork*ifp;
> + xfs_fsblock_t   gotblkcnt; /* new block count for got */
> + int error = 0;
> + int logflags;
> + int i = 0;
> +
> + if (unlikely(XFS_TEST_ERROR(
> + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
> +  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
> +  mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
> + XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
> +  XFS_ERRLEVEL_LOW, mp);
> + return XFS_ERROR(EFSCORRUPTED);
> + }
> +
> + if (XFS_FORCED_SHUTDOWN(mp))
> + return XFS_ERROR(EIO);
> +
> + ASSERT(current_ext != NULL);
> +
> + ifp = XFS_IFORK_PTR(ip, whichfork);
> + if (!(ifp->if_flags & XFS_IFEXTENTS)) {
> + /* Read in all the extents */
> + error = xfs_iread_extents(tp, ip, whichfork);
> + if (error)
> + return error;
> + }
> +
> + gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
> + /*
> +  * gotp can be null in 2 cases: 1) if there are no extents
> +  * or 2) split_fsb lies in a hole beyond which there are
> +  * no extents. Either way, we are done.
> +  */
> + if (!gotp)
> + return 0;
> +
> + xfs_bmbt_get_all(gotp, );
> +
> + /*
> +  * Check split_fsb lies in a hole or the start boundary offset
> +  * of the extent.
> +  */
> + if (got.br_startoff >= split_fsb)
> + return 0;
> +
> + gotblkcnt = split_fsb - got.br_startoff;
> + new.br_startoff = split_fsb;
> + new.br_startblock = got.br_startblock + gotblkcnt;
> + new.br_blockcount = got.br_blockcount - gotblkcnt;
> + new.br_state = got.br_state;
> 

Re: [PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-09 Thread Brian Foster
On Thu, May 08, 2014 at 07:26:16PM +0900, Namjae Jeon wrote:
 This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.
 
 1) Make sure that both offset and len are block size aligned.
 2) Update the i_size of inode by len bytes.
 3) Compute the file's logical block number against offset. If the computed
block number is not the starting block of the extent, split the extent
such that the block number is the starting block of the extent.
 4) Shift all the extents which are lying bewteen [offset, last allocated 
 extent]
towards right by len bytes. This step will make a hole of len bytes
at offset.
 5) Allocate unwritten extents for the hole created in step 4.
 
 Signed-off-by: Namjae Jeon namjae.j...@samsung.com
 Signed-off-by: Ashish Sangwan a.sang...@samsung.com
 ---
  fs/xfs/xfs_bmap.c  | 372 
 -
  fs/xfs/xfs_bmap.h  |   9 +-
  fs/xfs/xfs_bmap_util.c | 129 -
  fs/xfs/xfs_bmap_util.h |   2 +
  fs/xfs/xfs_file.c  |  37 -
  fs/xfs/xfs_trace.h |   1 +
  6 files changed, 545 insertions(+), 5 deletions(-)
 
 diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
 index 1ff0da6..e24aa14 100644
 --- a/fs/xfs/xfs_bmap.c
 +++ b/fs/xfs/xfs_bmap.c
 @@ -5419,7 +5419,7 @@ error0:
   * into, this will be considered invalid operation and we abort immediately.
   */
  int
 -xfs_bmap_shift_extents(
 +xfs_bmap_shift_extents_left(
   struct xfs_trans*tp,
   struct xfs_inode*ip,
   int *done,
 @@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
   (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS 
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
 - XFS_ERROR_REPORT(xfs_bmap_shift_extents,
 + XFS_ERROR_REPORT(xfs_bmap_shift_extents_left,
XFS_ERRLEVEL_LOW, mp);
   return XFS_ERROR(EFSCORRUPTED);
   }
 @@ -5606,3 +5606,371 @@ del_cursor:
   xfs_trans_log_inode(tp, ip, logflags);
   return error;
  }
 +
 +/*
 + * Splits an extent into two extents at split_fsb block that it is
 + * the first block of the current_ext. @current_ext is a target extent
 + * to be splitted. @split_fsb is a block where the extents is spliited.
 + * If split_fsb lies in a hole or the first block of extents, just return 0.
 + */
 +STATIC int
 +xfs_bmap_split_extent_at(
 + struct xfs_trans*tp,
 + struct xfs_inode*ip,
 + xfs_fileoff_t   split_fsb,
 + xfs_extnum_t*current_ext,
 + xfs_fsblock_t   *firstfsb,
 + struct xfs_bmap_free*free_list)
 +{
 + int whichfork = XFS_DATA_FORK;
 + struct xfs_btree_cur*cur;
 + struct xfs_bmbt_rec_host*gotp;
 + struct xfs_bmbt_irecgot;
 + struct xfs_bmbt_irecnew; /* splitted extent */
 + struct xfs_mount*mp = ip-i_mount;
 + struct xfs_ifork*ifp;
 + xfs_fsblock_t   gotblkcnt; /* new block count for got */
 + int error = 0;
 + int logflags;
 + int i = 0;
 +
 + if (unlikely(XFS_TEST_ERROR(
 + (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS 
 +  XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
 +  mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
 + XFS_ERROR_REPORT(xfs_bmap_split_extent_at,
 +  XFS_ERRLEVEL_LOW, mp);
 + return XFS_ERROR(EFSCORRUPTED);
 + }
 +
 + if (XFS_FORCED_SHUTDOWN(mp))
 + return XFS_ERROR(EIO);
 +
 + ASSERT(current_ext != NULL);
 +
 + ifp = XFS_IFORK_PTR(ip, whichfork);
 + if (!(ifp-if_flags  XFS_IFEXTENTS)) {
 + /* Read in all the extents */
 + error = xfs_iread_extents(tp, ip, whichfork);
 + if (error)
 + return error;
 + }
 +
 + gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
 + /*
 +  * gotp can be null in 2 cases: 1) if there are no extents
 +  * or 2) split_fsb lies in a hole beyond which there are
 +  * no extents. Either way, we are done.
 +  */
 + if (!gotp)
 + return 0;
 +
 + xfs_bmbt_get_all(gotp, got);
 +
 + /*
 +  * Check split_fsb lies in a hole or the start boundary offset
 +  * of the extent.
 +  */
 + if (got.br_startoff = split_fsb)
 + return 0;
 +
 + gotblkcnt = split_fsb - got.br_startoff;
 + new.br_startoff = split_fsb;
 + new.br_startblock = got.br_startblock + gotblkcnt;
 + new.br_blockcount = got.br_blockcount - gotblkcnt;
 + new.br_state = got.br_state;
 +
 + /* We are going to change core inode */
 + logflags = XFS_ILOG_CORE;
 +
 + 

[PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-08 Thread Namjae Jeon
This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.

1) Make sure that both offset and len are block size aligned.
2) Update the i_size of inode by len bytes.
3) Compute the file's logical block number against offset. If the computed
   block number is not the starting block of the extent, split the extent
   such that the block number is the starting block of the extent.
4) Shift all the extents which are lying bewteen [offset, last allocated extent]
   towards right by len bytes. This step will make a hole of len bytes
   at offset.
5) Allocate unwritten extents for the hole created in step 4.

Signed-off-by: Namjae Jeon 
Signed-off-by: Ashish Sangwan 
---
 fs/xfs/xfs_bmap.c  | 372 -
 fs/xfs/xfs_bmap.h  |   9 +-
 fs/xfs/xfs_bmap_util.c | 129 -
 fs/xfs/xfs_bmap_util.h |   2 +
 fs/xfs/xfs_file.c  |  37 -
 fs/xfs/xfs_trace.h |   1 +
 6 files changed, 545 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1ff0da6..e24aa14 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5419,7 +5419,7 @@ error0:
  * into, this will be considered invalid operation and we abort immediately.
  */
 int
-xfs_bmap_shift_extents(
+xfs_bmap_shift_extents_left(
struct xfs_trans*tp,
struct xfs_inode*ip,
int *done,
@@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-   XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+   XFS_ERROR_REPORT("xfs_bmap_shift_extents_left",
 XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -5606,3 +5606,371 @@ del_cursor:
xfs_trans_log_inode(tp, ip, logflags);
return error;
 }
+
+/*
+ * Splits an extent into two extents at split_fsb block that it is
+ * the first block of the current_ext. @current_ext is a target extent
+ * to be splitted. @split_fsb is a block where the extents is spliited.
+ * If split_fsb lies in a hole or the first block of extents, just return 0.
+ */
+STATIC int
+xfs_bmap_split_extent_at(
+   struct xfs_trans*tp,
+   struct xfs_inode*ip,
+   xfs_fileoff_t   split_fsb,
+   xfs_extnum_t*current_ext,
+   xfs_fsblock_t   *firstfsb,
+   struct xfs_bmap_free*free_list)
+{
+   int whichfork = XFS_DATA_FORK;
+   struct xfs_btree_cur*cur;
+   struct xfs_bmbt_rec_host*gotp;
+   struct xfs_bmbt_irecgot;
+   struct xfs_bmbt_irecnew; /* splitted extent */
+   struct xfs_mount*mp = ip->i_mount;
+   struct xfs_ifork*ifp;
+   xfs_fsblock_t   gotblkcnt; /* new block count for got */
+   int error = 0;
+   int logflags;
+   int i = 0;
+
+   if (unlikely(XFS_TEST_ERROR(
+   (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+   XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
+XFS_ERRLEVEL_LOW, mp);
+   return XFS_ERROR(EFSCORRUPTED);
+   }
+
+   if (XFS_FORCED_SHUTDOWN(mp))
+   return XFS_ERROR(EIO);
+
+   ASSERT(current_ext != NULL);
+
+   ifp = XFS_IFORK_PTR(ip, whichfork);
+   if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+   /* Read in all the extents */
+   error = xfs_iread_extents(tp, ip, whichfork);
+   if (error)
+   return error;
+   }
+
+   gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
+   /*
+* gotp can be null in 2 cases: 1) if there are no extents
+* or 2) split_fsb lies in a hole beyond which there are
+* no extents. Either way, we are done.
+*/
+   if (!gotp)
+   return 0;
+
+   xfs_bmbt_get_all(gotp, );
+
+   /*
+* Check split_fsb lies in a hole or the start boundary offset
+* of the extent.
+*/
+   if (got.br_startoff >= split_fsb)
+   return 0;
+
+   gotblkcnt = split_fsb - got.br_startoff;
+   new.br_startoff = split_fsb;
+   new.br_startblock = got.br_startblock + gotblkcnt;
+   new.br_blockcount = got.br_blockcount - gotblkcnt;
+   new.br_state = got.br_state;
+
+   /* We are going to change core inode */
+   logflags = XFS_ILOG_CORE;
+
+   if (ifp->if_flags & XFS_IFBROOT) {
+   cur = xfs_bmbt_init_cursor(mp, 

[PATCH v2 2/10] xfs: Add support FALLOC_FL_INSERT_RANGE for fallocate

2014-05-08 Thread Namjae Jeon
This patch implements fallocate's FALLOC_FL_INSERT_RANGE for XFS.

1) Make sure that both offset and len are block size aligned.
2) Update the i_size of inode by len bytes.
3) Compute the file's logical block number against offset. If the computed
   block number is not the starting block of the extent, split the extent
   such that the block number is the starting block of the extent.
4) Shift all the extents which are lying bewteen [offset, last allocated extent]
   towards right by len bytes. This step will make a hole of len bytes
   at offset.
5) Allocate unwritten extents for the hole created in step 4.

Signed-off-by: Namjae Jeon namjae.j...@samsung.com
Signed-off-by: Ashish Sangwan a.sang...@samsung.com
---
 fs/xfs/xfs_bmap.c  | 372 -
 fs/xfs/xfs_bmap.h  |   9 +-
 fs/xfs/xfs_bmap_util.c | 129 -
 fs/xfs/xfs_bmap_util.h |   2 +
 fs/xfs/xfs_file.c  |  37 -
 fs/xfs/xfs_trace.h |   1 +
 6 files changed, 545 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1ff0da6..e24aa14 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5419,7 +5419,7 @@ error0:
  * into, this will be considered invalid operation and we abort immediately.
  */
 int
-xfs_bmap_shift_extents(
+xfs_bmap_shift_extents_left(
struct xfs_trans*tp,
struct xfs_inode*ip,
int *done,
@@ -5449,7 +5449,7 @@ xfs_bmap_shift_extents(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS 
 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
 mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
-   XFS_ERROR_REPORT(xfs_bmap_shift_extents,
+   XFS_ERROR_REPORT(xfs_bmap_shift_extents_left,
 XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -5606,3 +5606,371 @@ del_cursor:
xfs_trans_log_inode(tp, ip, logflags);
return error;
 }
+
+/*
+ * Splits an extent into two extents at split_fsb block that it is
+ * the first block of the current_ext. @current_ext is a target extent
+ * to be splitted. @split_fsb is a block where the extents is spliited.
+ * If split_fsb lies in a hole or the first block of extents, just return 0.
+ */
+STATIC int
+xfs_bmap_split_extent_at(
+   struct xfs_trans*tp,
+   struct xfs_inode*ip,
+   xfs_fileoff_t   split_fsb,
+   xfs_extnum_t*current_ext,
+   xfs_fsblock_t   *firstfsb,
+   struct xfs_bmap_free*free_list)
+{
+   int whichfork = XFS_DATA_FORK;
+   struct xfs_btree_cur*cur;
+   struct xfs_bmbt_rec_host*gotp;
+   struct xfs_bmbt_irecgot;
+   struct xfs_bmbt_irecnew; /* splitted extent */
+   struct xfs_mount*mp = ip-i_mount;
+   struct xfs_ifork*ifp;
+   xfs_fsblock_t   gotblkcnt; /* new block count for got */
+   int error = 0;
+   int logflags;
+   int i = 0;
+
+   if (unlikely(XFS_TEST_ERROR(
+   (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS 
+XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+   XFS_ERROR_REPORT(xfs_bmap_split_extent_at,
+XFS_ERRLEVEL_LOW, mp);
+   return XFS_ERROR(EFSCORRUPTED);
+   }
+
+   if (XFS_FORCED_SHUTDOWN(mp))
+   return XFS_ERROR(EIO);
+
+   ASSERT(current_ext != NULL);
+
+   ifp = XFS_IFORK_PTR(ip, whichfork);
+   if (!(ifp-if_flags  XFS_IFEXTENTS)) {
+   /* Read in all the extents */
+   error = xfs_iread_extents(tp, ip, whichfork);
+   if (error)
+   return error;
+   }
+
+   gotp = xfs_iext_bno_to_ext(ifp, split_fsb, current_ext);
+   /*
+* gotp can be null in 2 cases: 1) if there are no extents
+* or 2) split_fsb lies in a hole beyond which there are
+* no extents. Either way, we are done.
+*/
+   if (!gotp)
+   return 0;
+
+   xfs_bmbt_get_all(gotp, got);
+
+   /*
+* Check split_fsb lies in a hole or the start boundary offset
+* of the extent.
+*/
+   if (got.br_startoff = split_fsb)
+   return 0;
+
+   gotblkcnt = split_fsb - got.br_startoff;
+   new.br_startoff = split_fsb;
+   new.br_startblock = got.br_startblock + gotblkcnt;
+   new.br_blockcount = got.br_blockcount - gotblkcnt;
+   new.br_state = got.br_state;
+
+   /* We are going to change core inode */
+   logflags = XFS_ILOG_CORE;
+
+   if (ifp-if_flags  XFS_IFBROOT) {
+