Hi Andrew,

Today's linux-next merge of the akpm-current tree got a conflict in:

  fs/ocfs2/aops.c

between commit:

  187372a3b9fa ("direct-io: always call ->end_io if non-NULL")

from the xfs tree and commit:

  8e1fbb488a18 ("ocfs2: fix sparse file & data ordering issue in direct io")

from the akpm-current tree.

I fixed it up (I think - see below) and can carry the fix as necessary
(no action is required).

-- 
Cheers,
Stephen Rothwell

diff --cc fs/ocfs2/aops.c
index 5dcc5f5a842e,3d3952ebe101..000000000000
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@@ -2476,6 -2099,356 +2099,361 @@@ static int ocfs2_write_end(struct file 
        return ret;
  }
  
+ struct ocfs2_dio_write_ctxt {
+       struct list_head        dw_zero_list;
+       unsigned                dw_zero_count;
+       int                     dw_orphaned;
+       pid_t                   dw_writer_pid;
+ };
+ 
+ static struct ocfs2_dio_write_ctxt *
+ ocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc)
+ {
+       struct ocfs2_dio_write_ctxt *dwc = NULL;
+ 
+       if (bh->b_private)
+               return bh->b_private;
+ 
+       dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS);
+       if (dwc == NULL)
+               return NULL;
+       INIT_LIST_HEAD(&dwc->dw_zero_list);
+       dwc->dw_zero_count = 0;
+       dwc->dw_orphaned = 0;
+       dwc->dw_writer_pid = task_pid_nr(current);
+       bh->b_private = dwc;
+       *alloc = 1;
+ 
+       return dwc;
+ }
+ 
+ static void ocfs2_dio_free_write_ctx(struct inode *inode,
+                                    struct ocfs2_dio_write_ctxt *dwc)
+ {
+       ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list);
+       kfree(dwc);
+ }
+ 
+ /*
+  * TODO: Make this into a generic get_blocks function.
+  *
+  * From do_direct_io in direct-io.c:
+  *  "So what we do is to permit the ->get_blocks function to populate
+  *   bh.b_size with the size of IO which is permitted at this offset and
+  *   this i_blkbits."
+  *
+  * This function is called directly from get_more_blocks in direct-io.c.
+  *
+  * called like this: dio->get_blocks(dio->inode, fs_startblk,
+  *                                    fs_count, map_bh, dio->rw == WRITE);
+  */
+ static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+                              struct buffer_head *bh_result, int create)
+ {
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_write_ctxt *wc;
+       struct ocfs2_write_cluster_desc *desc = NULL;
+       struct ocfs2_dio_write_ctxt *dwc = NULL;
+       struct buffer_head *di_bh = NULL;
+       u64 p_blkno;
+       loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+       unsigned len, total_len = bh_result->b_size;
+       int ret = 0, first_get_block = 0;
+ 
+       len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
+       len = min(total_len, len);
+ 
+       mlog(0, "get block of %lu at %llu:%u req %u\n",
+                       inode->i_ino, pos, len, total_len);
+ 
+       /*
+        * Because we need to change file size in ocfs2_dio_end_io_write(), or
+        * we may need to add it to orphan dir. So can not fall to fast path
+        * while file size will be changed.
+        */
+       if (pos + total_len <= i_size_read(inode)) {
+               down_read(&oi->ip_alloc_sem);
+               /* This is the fast path for re-write. */
+               ret = ocfs2_get_block(inode, iblock, bh_result, create);
+ 
+               up_read(&oi->ip_alloc_sem);
+ 
+               if (buffer_mapped(bh_result) &&
+                   !buffer_new(bh_result) &&
+                   ret == 0)
+                       goto out;
+ 
+               /* Clear state set by ocfs2_get_block. */
+               bh_result->b_state = 0;
+       }
+ 
+       dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block);
+       if (unlikely(dwc == NULL)) {
+               ret = -ENOMEM;
+               mlog_errno(ret);
+               goto out;
+       }
+ 
+       if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) >
+           ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) &&
+           !dwc->dw_orphaned) {
+               /*
+                * when we are going to alloc extents beyond file size, add the
+                * inode to orphan dir, so we can recall those spaces when
+                * system crashed during write.
+                */
+               ret = ocfs2_add_inode_to_orphan(osb, inode);
+               if (ret < 0) {
+                       mlog_errno(ret);
+                       goto out;
+               }
+               dwc->dw_orphaned = 1;
+       }
+ 
+       ret = ocfs2_inode_lock(inode, &di_bh, 1);
+       if (ret) {
+               mlog_errno(ret);
+               goto out;
+       }
+ 
+       down_write(&oi->ip_alloc_sem);
+ 
+       if (first_get_block) {
+               if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+                       ret = ocfs2_zero_tail(inode, di_bh, pos);
+               else
+                       ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+                                                          total_len, NULL);
+               if (ret < 0) {
+                       mlog_errno(ret);
+                       goto unlock;
+               }
+       }
+ 
+       ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len,
+                                      OCFS2_WRITE_DIRECT, NULL,
+                                      (void **)&wc, di_bh, NULL);
+       if (ret) {
+               mlog_errno(ret);
+               goto unlock;
+       }
+ 
+       desc = &wc->w_desc[0];
+ 
+       p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys);
+       BUG_ON(p_blkno == 0);
+       p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1);
+ 
+       map_bh(bh_result, inode->i_sb, p_blkno);
+       bh_result->b_size = len;
+       if (desc->c_needs_zero)
+               set_buffer_new(bh_result);
+ 
+       /* May sleep in end_io. It should not happen in a irq context. So defer
+        * it to dio work queue. */
+       set_buffer_defer_completion(bh_result);
+ 
+       if (!list_empty(&wc->w_unwritten_list)) {
+               struct ocfs2_unwritten_extent *ue = NULL;
+ 
+               ue = list_first_entry(&wc->w_unwritten_list,
+                                     struct ocfs2_unwritten_extent,
+                                     ue_node);
+               BUG_ON(ue->ue_cpos != desc->c_cpos);
+               /* The physical address may be 0, fill it. */
+               ue->ue_phys = desc->c_phys;
+ 
+               list_splice_tail_init(&wc->w_unwritten_list, 
&dwc->dw_zero_list);
+               dwc->dw_zero_count++;
+       }
+ 
+       ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+       BUG_ON(ret != len);
+       ret = 0;
+ unlock:
+       up_write(&oi->ip_alloc_sem);
+       ocfs2_inode_unlock(inode, 1);
+       brelse(di_bh);
+ out:
+       if (ret < 0)
+               ret = -EIO;
+       return ret;
+ }
+ 
+ static void ocfs2_dio_end_io_write(struct inode *inode,
+                                  struct ocfs2_dio_write_ctxt *dwc,
+                                  loff_t offset,
+                                  ssize_t bytes)
+ {
+       struct ocfs2_cached_dealloc_ctxt dealloc;
+       struct ocfs2_extent_tree et;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       struct ocfs2_inode_info *oi = OCFS2_I(inode);
+       struct ocfs2_unwritten_extent *ue = NULL;
+       struct buffer_head *di_bh = NULL;
+       struct ocfs2_dinode *di;
+       struct ocfs2_alloc_context *data_ac = NULL;
+       struct ocfs2_alloc_context *meta_ac = NULL;
+       handle_t *handle = NULL;
+       loff_t end = offset + bytes;
+       int ret = 0, credits = 0, locked = 0;
+ 
+       ocfs2_init_dealloc_ctxt(&dealloc);
+ 
+       /* We do clear unwritten, delete orphan, change i_size here. If neither
+        * of these happen, we can skip all this. */
+       if (list_empty(&dwc->dw_zero_list) &&
+           end <= i_size_read(inode) &&
+           !dwc->dw_orphaned)
+               goto out;
+ 
+       /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we
+        * are in that context. */
+       if (dwc->dw_writer_pid != task_pid_nr(current)) {
+               mutex_lock(&inode->i_mutex);
+               locked = 1;
+       }
+ 
+       ret = ocfs2_inode_lock(inode, &di_bh, 1);
+       if (ret < 0) {
+               mlog_errno(ret);
+               goto out;
+       }
+ 
+       down_write(&oi->ip_alloc_sem);
+ 
+       /* Delete orphan before acquire i_mutex. */
+       if (dwc->dw_orphaned) {
+               BUG_ON(dwc->dw_writer_pid != task_pid_nr(current));
+ 
+               end = end > i_size_read(inode) ? end : 0;
+ 
+               ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
+                               !!end, end);
+               if (ret < 0)
+                       mlog_errno(ret);
+       }
+ 
+       di = (struct ocfs2_dinode *)di_bh;
+ 
+       ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+ 
+       ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
+                                   &data_ac, &meta_ac);
+       if (ret) {
+               mlog_errno(ret);
+               goto unlock;
+       }
+ 
+       credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list);
+ 
+       handle = ocfs2_start_trans(osb, credits);
+       if (IS_ERR(handle)) {
+               ret = PTR_ERR(handle);
+               mlog_errno(ret);
+               goto unlock;
+       }
+       ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+                                     OCFS2_JOURNAL_ACCESS_WRITE);
+       if (ret) {
+               mlog_errno(ret);
+               goto commit;
+       }
+ 
+       list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+               ret = ocfs2_mark_extent_written(inode, &et, handle,
+                                               ue->ue_cpos, 1,
+                                               ue->ue_phys,
+                                               meta_ac, &dealloc);
+               if (ret < 0) {
+                       mlog_errno(ret);
+                       break;
+               }
+       }
+ 
+       if (end > i_size_read(inode)) {
+               ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
+               if (ret < 0)
+                       mlog_errno(ret);
+       }
+ commit:
+       ocfs2_commit_trans(osb, handle);
+ unlock:
+       up_write(&oi->ip_alloc_sem);
+       ocfs2_inode_unlock(inode, 1);
+       brelse(di_bh);
+ out:
+       if (data_ac)
+               ocfs2_free_alloc_context(data_ac);
+       if (meta_ac)
+               ocfs2_free_alloc_context(meta_ac);
+       ocfs2_run_deallocs(osb, &dealloc);
+       if (locked)
+               mutex_unlock(&inode->i_mutex);
+       ocfs2_dio_free_write_ctx(inode, dwc);
+ }
+ 
+ /*
+  * ocfs2_dio_end_io is called by the dio core when a dio is finished.  We're
+  * particularly interested in the aio/dio case.  We use the rw_lock DLM lock
+  * to protect io on one node from truncation on another.
+  */
 -static void ocfs2_dio_end_io(struct kiocb *iocb,
 -                           loff_t offset,
 -                           ssize_t bytes,
 -                           void *private)
++static int ocfs2_dio_end_io(struct kiocb *iocb,
++                          loff_t offset,
++                          ssize_t bytes,
++                          void *private)
+ {
+       struct inode *inode = file_inode(iocb->ki_filp);
+       int level;
+ 
++      if (bytes <= 0)
++              return 0;
++
+       /* this io's submitter should not have unlocked this before we could */
+       BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+ 
+       if (private)
+               ocfs2_dio_end_io_write(inode, private, offset, bytes);
+ 
+       ocfs2_iocb_clear_rw_locked(iocb);
+ 
+       level = ocfs2_iocb_rw_locked_level(iocb);
+       ocfs2_rw_unlock(inode, level);
++
++      return 0;
+ }
+ 
+ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+                              loff_t offset)
+ {
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file)->i_mapping->host;
+       struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+       loff_t end = offset + iter->count;
+       get_block_t *get_block;
+ 
+       /*
+        * Fallback to buffered I/O if we see an inode without
+        * extents.
+        */
+       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+               return 0;
+ 
+       /* Fallback to buffered I/O if we do not support append dio. */
+       if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb))
+               return 0;
+ 
+       if (iov_iter_rw(iter) == READ)
+               get_block = ocfs2_get_block;
+       else
+               get_block = ocfs2_dio_get_block;
+ 
+       return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+                                   iter, offset, get_block,
+                                   ocfs2_dio_end_io, NULL, 0);
+ }
+ 
  const struct address_space_operations ocfs2_aops = {
        .readpage               = ocfs2_readpage,
        .readpages              = ocfs2_readpages,

Reply via email to