Nick Piggin <[EMAIL PROTECTED]> writes:

> Rework the cont filesystem helpers so that generic_cont_expand does the
> actual work of expanding the file. cont_prepare_write then calls this
> routine if expanding is needed, and retries. Also solves the problem
> where cont_prepare_write would previously hold the target page locked
> while doing not-very-nice things like locking other pages.
>
> Means that zero-length prepare/commit_write pairs are no longer needed
> as an overloaded directive to extend the file, thus cont should operate
> better within the new deadlock-free buffered write code.
>
> Converts fat over to the new cont scheme.
>
> Signed-off-by: Nick Piggin <[EMAIL PROTECTED]>
>
>
> Index: linux-2.6/fs/buffer.c
> ===================================================================
> --- linux-2.6.orig/fs/buffer.c
> +++ linux-2.6/fs/buffer.c
> @@ -2004,19 +2004,20 @@ int block_read_full_page(struct page *pa
>       return 0;
>  }
>  
> -/* utility function for filesystems that need to do work on expanding
> - * truncates.  Uses prepare/commit_write to allow the filesystem to
> - * deal with the hole.  
> +/*
> + * Utility function for filesystems that need to do work on expanding
> + * truncates. For moronic filesystems that do not allow holes in file.
>   */
> -static int __generic_cont_expand(struct inode *inode, loff_t size,
> -                              pgoff_t index, unsigned int offset)
> +int generic_cont_expand(struct inode *inode, loff_t size, loff_t *bytes,
> +                                             get_block_t *get_block)
>  {
>       struct address_space *mapping = inode->i_mapping;
> +     unsigned long blocksize = 1 << inode->i_blkbits;
>       struct page *page;
>       unsigned long limit;
> -     int err;
> +     int status;
>  
> -     err = -EFBIG;
> +     status = -EFBIG;
>          limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
>       if (limit != RLIM_INFINITY && size > (loff_t)limit) {
>               send_sig(SIGXFSZ, current, 0);
> @@ -2025,146 +2026,83 @@ static int __generic_cont_expand(struct 
>       if (size > inode->i_sb->s_maxbytes)
>               goto out;
>  
> -     err = -ENOMEM;
> -     page = grab_cache_page(mapping, index);
> -     if (!page)
> -             goto out;
> -     err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
> -     if (err) {
> -             /*
> -              * ->prepare_write() may have instantiated a few blocks
> -              * outside i_size.  Trim these off again.
> -              */
> -             unlock_page(page);
> -             page_cache_release(page);
> -             vmtruncate(inode, inode->i_size);
> -             goto out;
> -     }
> +     status = 0;
>  
> -     err = mapping->a_ops->commit_write(NULL, page, offset, offset);
> +     while (*bytes < size) {
> +             unsigned int zerofrom;
> +             unsigned int zeroto;
> +             void *kaddr;
> +             pgoff_t pgpos;
> +
> +             pgpos = *bytes >> PAGE_CACHE_SHIFT;
> +             page = grab_cache_page(mapping, pgpos);
> +             if (!page) {
> +                     status = -ENOMEM;
> +                     break;
> +             }
> +             /* we might sleep */
> +             if (*bytes >> PAGE_CACHE_SHIFT != pgpos)
> +                     goto unlock;
>  
> -     unlock_page(page);
> -     page_cache_release(page);
> -     if (err > 0)
> -             err = 0;
> -out:
> -     return err;
> -}
> +             zerofrom = *bytes & ~PAGE_CACHE_MASK;
> +             if (zerofrom & (blocksize-1))
> +                     *bytes = (*bytes + blocksize-1) & (blocksize-1);
>  
> -int generic_cont_expand(struct inode *inode, loff_t size)
> -{
> -     pgoff_t index;
> -     unsigned int offset;
> +             zeroto = PAGE_CACHE_SIZE;
>  
> -     offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
> +             status = __block_prepare_write(inode, page, zerofrom,
> +                                             zeroto, get_block);
> +             if (status)
> +                     goto unlock;
> +             kaddr = kmap_atomic(page, KM_USER0);
> +             memset(kaddr+zerofrom, 0, PAGE_CACHE_SIZE-zerofrom);
> +             flush_dcache_page(page);
> +             kunmap_atomic(kaddr, KM_USER0);
> +             status = __block_commit_write(inode, page, zerofrom, zeroto);
>  
> -     /* ugh.  in prepare/commit_write, if from==to==start of block, we
> -     ** skip the prepare.  make sure we never send an offset for the start
> -     ** of a block
> -     */
> -     if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
> -             /* caller must handle this extra byte. */
> -             offset++;
> +unlock:
> +             unlock_page(page);
> +             page_cache_release(page);
> +             if (status) {
> +                     BUG_ON(status == AOP_TRUNCATED_PAGE);
> +                     break;
> +             }
>       }
> -     index = size >> PAGE_CACHE_SHIFT;
>  
> -     return __generic_cont_expand(inode, size, index, offset);
> -}
> -
> -int generic_cont_expand_simple(struct inode *inode, loff_t size)
> -{
> -     loff_t pos = size - 1;
> -     pgoff_t index = pos >> PAGE_CACHE_SHIFT;
> -     unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
> +     /*
> +      * No need to use i_size_read() here, the i_size
> +      * cannot change under us because we hold i_mutex.
> +      */
> +     if (size > inode->i_size) {
> +             i_size_write(inode, size);
> +             mark_inode_dirty(inode);
> +     }
>  
> -     /* prepare/commit_write can handle even if from==to==start of block. */
> -     return __generic_cont_expand(inode, size, index, offset);
> +out:
> +     return status;
>  }

quick look. Doesn't this break reiserfs? IIRC, the reiserfs is using
it for another reason. I was also working for this, but I lost the
thread of this, sorry.

I found some another users (affs, hfs, hfsplus). Those seem have same
problem, but probably those also can use this...

What do you think?
-- 
OGAWA Hirofumi <[EMAIL PROTECTED]>



Signed-off-by: OGAWA Hirofumi <[EMAIL PROTECTED]>
---

 fs/buffer.c                 |   59 +++++++++++++++++---------------------------
 fs/fat/file.c               |    2 -
 include/linux/buffer_head.h |    1 
 3 files changed, 24 insertions(+), 38 deletions(-)

diff -puN fs/buffer.c~generic_cont_expand-avoid-zero-size fs/buffer.c
--- linux-2.6/fs/buffer.c~generic_cont_expand-avoid-zero-size   2006-11-13 
01:42:01.000000000 +0900
+++ linux-2.6-hirofumi/fs/buffer.c      2006-11-13 02:16:20.000000000 +0900
@@ -2004,18 +2004,24 @@ int block_read_full_page(struct page *pa
        return 0;
 }
 
-/* utility function for filesystems that need to do work on expanding
+/*
+ * utility function for filesystems that need to do work on expanding
  * truncates.  Uses prepare/commit_write to allow the filesystem to
  * deal with the hole.  
  */
-static int __generic_cont_expand(struct inode *inode, loff_t size,
-                                pgoff_t index, unsigned int offset)
+int generic_cont_expand(struct inode *inode, loff_t size)
 {
        struct address_space *mapping = inode->i_mapping;
+       loff_t pos = inode->i_size;
        struct page *page;
        unsigned long limit;
+       pgoff_t index;
+       unsigned int from, to;
+       void *kaddr;
        int err;
 
+       WARN_ON(pos >= size);
+
        err = -EFBIG;
         limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
        if (limit != RLIM_INFINITY && size > (loff_t)limit) {
@@ -2025,11 +2031,18 @@ static int __generic_cont_expand(struct 
        if (size > inode->i_sb->s_maxbytes)
                goto out;
 
+       index = (size - 1) >> PAGE_CACHE_SHIFT;
+       to = size - ((loff_t)index << PAGE_CACHE_SHIFT);
+       if (index != (pos >> PAGE_CACHE_SHIFT))
+               from = 0;
+       else
+               from = pos & (PAGE_CACHE_SIZE - 1);
+
        err = -ENOMEM;
        page = grab_cache_page(mapping, index);
        if (!page)
                goto out;
-       err = mapping->a_ops->prepare_write(NULL, page, offset, offset);
+       err = mapping->a_ops->prepare_write(NULL, page, from, to);
        if (err) {
                /*
                 * ->prepare_write() may have instantiated a few blocks
@@ -2041,7 +2054,12 @@ static int __generic_cont_expand(struct 
                goto out;
        }
 
-       err = mapping->a_ops->commit_write(NULL, page, offset, offset);
+       kaddr = kmap_atomic(page, KM_USER0);
+       memset(kaddr + from, 0, to - from);
+       flush_dcache_page(page);
+       kunmap_atomic(kaddr, KM_USER0);
+
+       err = mapping->a_ops->commit_write(NULL, page, from, to);
 
        unlock_page(page);
        page_cache_release(page);
@@ -2051,36 +2069,6 @@ out:
        return err;
 }
 
-int generic_cont_expand(struct inode *inode, loff_t size)
-{
-       pgoff_t index;
-       unsigned int offset;
-
-       offset = (size & (PAGE_CACHE_SIZE - 1)); /* Within page */
-
-       /* ugh.  in prepare/commit_write, if from==to==start of block, we
-       ** skip the prepare.  make sure we never send an offset for the start
-       ** of a block
-       */
-       if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-               /* caller must handle this extra byte. */
-               offset++;
-       }
-       index = size >> PAGE_CACHE_SHIFT;
-
-       return __generic_cont_expand(inode, size, index, offset);
-}
-
-int generic_cont_expand_simple(struct inode *inode, loff_t size)
-{
-       loff_t pos = size - 1;
-       pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-       unsigned int offset = (pos & (PAGE_CACHE_SIZE - 1)) + 1;
-
-       /* prepare/commit_write can handle even if from==to==start of block. */
-       return __generic_cont_expand(inode, size, index, offset);
-}
-
 /*
  * For moronic filesystems that do not allow holes in file.
  * We may have to extend the file.
@@ -3032,7 +3020,6 @@ EXPORT_SYMBOL(fsync_bdev);
 EXPORT_SYMBOL(generic_block_bmap);
 EXPORT_SYMBOL(generic_commit_write);
 EXPORT_SYMBOL(generic_cont_expand);
-EXPORT_SYMBOL(generic_cont_expand_simple);
 EXPORT_SYMBOL(init_buffer);
 EXPORT_SYMBOL(invalidate_bdev);
 EXPORT_SYMBOL(ll_rw_block);
diff -puN include/linux/buffer_head.h~generic_cont_expand-avoid-zero-size 
include/linux/buffer_head.h
--- linux-2.6/include/linux/buffer_head.h~generic_cont_expand-avoid-zero-size   
2006-11-13 01:42:01.000000000 +0900
+++ linux-2.6-hirofumi/include/linux/buffer_head.h      2006-11-13 
01:42:01.000000000 +0900
@@ -202,7 +202,6 @@ int block_prepare_write(struct page*, un
 int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
                                loff_t *);
 int generic_cont_expand(struct inode *inode, loff_t size);
-int generic_cont_expand_simple(struct inode *inode, loff_t size);
 int block_commit_write(struct page *page, unsigned from, unsigned to);
 void block_sync_page(struct page *);
 sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
diff -puN fs/fat/file.c~generic_cont_expand-avoid-zero-size fs/fat/file.c
--- linux-2.6/fs/fat/file.c~generic_cont_expand-avoid-zero-size 2006-11-13 
01:42:01.000000000 +0900
+++ linux-2.6-hirofumi/fs/fat/file.c    2006-11-13 01:42:01.000000000 +0900
@@ -143,7 +143,7 @@ static int fat_cont_expand(struct inode 
        loff_t start = inode->i_size, count = size - inode->i_size;
        int err;
 
-       err = generic_cont_expand_simple(inode, size);
+       err = generic_cont_expand(inode, size);
        if (err)
                goto out;
 
_
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to