With all three patches I have corruption....

diff --git a/fs/buffer.c b/fs/buffer.c
index d1f1b54..263f88e 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2834,7 +2834,7 @@ int try_to_free_buffers(struct page *pag
        int ret = 0;
 
        BUG_ON(!PageLocked(page));
-       if (PageWriteback(page))
+       if (PageDirty(page) || PageWriteback(page))
                return 0;
 
        if (mapping == NULL) {          /* can this still happen? */
@@ -2845,22 +2845,6 @@ int try_to_free_buffers(struct page *pag
        spin_lock(&mapping->private_lock);
        ret = drop_buffers(page, &buffers_to_free);
        spin_unlock(&mapping->private_lock);
-       if (ret) {
-               /*
-                * If the filesystem writes its buffers by hand (eg ext3)
-                * then we can have clean buffers against a dirty page.  We
-                * clean the page here; otherwise later reattachment of buffers
-                * could encounter a non-uptodate page, which is unresolvable.
-                * This only applies in the rare case where try_to_free_buffers
-                * succeeds but the page is not freed.
-                *
-                * Also, during truncate, discard_buffer will have marked all
-                * the page's buffers clean.  We discover that here and clean
-                * the page also.
-                */
-               if (test_clear_page_dirty(page))
-                       task_io_account_cancelled_write(PAGE_CACHE_SIZE);
-       }
 out:
        if (buffers_to_free) {
                struct buffer_head *bh = buffers_to_free;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ed2c223..4f4cd13 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -176,7 +176,7 @@ static int hugetlbfs_commit_write(struct
 
 static void truncate_huge_page(struct page *page)
 {
-       clear_page_dirty(page);
+       cancel_dirty_page(page, /* No IO accounting for huge pages? */0);
        ClearPageUptodate(page);
        remove_from_page_cache(page);
        put_page(page);
diff --git a/include/asm-generic/pgtable.h
b/include/asm-generic/pgtable.h
index 9d774d0..8879f1d 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -61,31 +61,6 @@ ({                                                           
        \
 })
 #endif
 
-#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
-#define ptep_test_and_clear_dirty(__vma, __address, __ptep)            \
-({                                                                     \
-       pte_t __pte = *__ptep;                                          \
-       int r = 1;                                                      \
-       if (!pte_dirty(__pte))                                          \
-               r = 0;                                                  \
-       else                                                            \
-               set_pte_at((__vma)->vm_mm, (__address), (__ptep),       \
-                          pte_mkclean(__pte));                         \
-       r;                                                              \
-})
-#endif
-
-#ifndef __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
-#define ptep_clear_flush_dirty(__vma, __address, __ptep)               \
-({                                                                     \
-       int __dirty;                                                    \
-       __dirty = ptep_test_and_clear_dirty(__vma, __address, __ptep);  \
-       if (__dirty)                                                    \
-               flush_tlb_page(__vma, __address);                       \
-       __dirty;                                                        \
-})
-#endif
-
 #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
 #define ptep_get_and_clear(__mm, __address, __ptep)                    \
 ({                                                                     \
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index e6a4723..b61d6f9 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -300,18 +300,20 @@ do {                                                      
                \
        flush_tlb_page(vma, address);                                   \
 } while (0)
 
-#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
-#define ptep_clear_flush_dirty(vma, address, ptep)                     \
-({                                                                     \
-       int __dirty;                                                    \
-       __dirty = pte_dirty(*(ptep));                                   \
-       if (__dirty) {                                                  \
-               clear_bit(_PAGE_BIT_DIRTY, &(ptep)->pte_low);           \
-               pte_update_defer((vma)->vm_mm, (address), (ptep));      \
-               flush_tlb_page(vma, address);                           \
-       }                                                               \
-       __dirty;                                                        \
-})
+/*
+ * "ptep_exchange()" can be used to atomically change a set of
+ * page table protection bits, returning the old ones (the dirty
+ * and accessed bits in particular, since they are set by hw).
+ *
+ * "ptep_flush_dirty()" then returns the dirty status of the
+ * page (on x86-64, we just look at the dirty bit in the returned
+ * pte, but some other architectures have the dirty bits in
+ * other places than the page tables).
+ */
+#define ptep_exchange(vma, address, ptep, old, new) \
+       (old).pte_low = xchg(&(ptep)->pte_low, (new).pte_low);
+#define ptep_flush_dirty(vma, address, ptep, old) \
+       pte_dirty(old)
 
 #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
 #define ptep_clear_flush_young(vma, address, ptep)                     \
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 59901c6..07754b5 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -283,12 +283,20 @@ static inline pte_t pte_clrhuge(pte_t pt
 
 struct vm_area_struct;
 
-static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
-{
-       if (!pte_dirty(*ptep))
-               return 0;
-       return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
-}
+/*
+ * "ptep_exchange()" can be used to atomically change a set of
+ * page table protection bits, returning the old ones (the dirty
+ * and accessed bits in particular, since they are set by hw).
+ *
+ * "ptep_flush_dirty()" then returns the dirty status of the
+ * page (on x86-64, we just look at the dirty bit in the returned
+ * pte, but some other architectures have the dirty bits in
+ * other places than the page tables).
+ */
+#define ptep_exchange(vma, address, ptep, old, new) \
+       (old).pte = xchg(&(ptep)->pte, (new).pte);
+#define ptep_flush_dirty(vma, address, ptep, old) \
+       pte_dirty(old)
 
 static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
 {
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4830a3b..350878a 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -253,15 +253,11 @@ #define ClearPageUncached(page)   clear_bi
 
 struct page;   /* forward declaration */
 
-int test_clear_page_dirty(struct page *page);
+extern void cancel_dirty_page(struct page *page, unsigned int
account_size);
+
 int test_clear_page_writeback(struct page *page);
 int test_set_page_writeback(struct page *page);
 
-static inline void clear_page_dirty(struct page *page)
-{
-       test_clear_page_dirty(page);
-}
-
 static inline void set_page_writeback(struct page *page)
 {
        test_set_page_writeback(page);
diff --git a/mm/memory.c b/mm/memory.c
index c00bac6..79cecab 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1842,6 +1842,33 @@ void unmap_mapping_range(struct address_
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
+static void check_last_page(struct address_space *mapping, loff_t size)
+{
+       pgoff_t index;
+       unsigned int offset;
+       struct page *page;
+
+       if (!mapping)
+               return;
+       offset = size & ~PAGE_MASK;
+       if (!offset)
+               return;
+       index = size >> PAGE_SHIFT;
+       page = find_lock_page(mapping, index);
+       if (page) {
+               unsigned int check = 0;
+               unsigned char *kaddr = kmap_atomic(page, KM_USER0);
+               do {
+                       check += kaddr[offset++];
+               } while (offset < PAGE_SIZE);
+               kunmap_atomic(kaddr,KM_USER0);
+               unlock_page(page);
+               page_cache_release(page);
+               if (check)
+                       printk("%s: BADNESS: truncate check %u\n", 
current->comm, check);
+       }
+}
+
 /**
  * vmtruncate - unmap mappings "freed" by truncate() syscall
  * @inode: inode of the file used
@@ -1875,6 +1902,7 @@ do_expand:
                goto out_sig;
        if (offset > inode->i_sb->s_maxbytes)
                goto out_big;
+       check_last_page(mapping, inode->i_size);
        i_size_write(inode, offset);
 
 out_truncate:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 237107c..b3a198c 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -845,38 +845,6 @@ int set_page_dirty_lock(struct page *pag
 EXPORT_SYMBOL(set_page_dirty_lock);
 
 /*
- * Clear a page's dirty flag, while caring for dirty memory
accounting. 
- * Returns true if the page was previously dirty.
- */
-int test_clear_page_dirty(struct page *page)
-{
-       struct address_space *mapping = page_mapping(page);
-       unsigned long flags;
-
-       if (!mapping)
-               return TestClearPageDirty(page);
-
-       write_lock_irqsave(&mapping->tree_lock, flags);
-       if (TestClearPageDirty(page)) {
-               radix_tree_tag_clear(&mapping->page_tree,
-                               page_index(page), PAGECACHE_TAG_DIRTY);
-               write_unlock_irqrestore(&mapping->tree_lock, flags);
-               /*
-                * We can continue to use `mapping' here because the
-                * page is locked, which pins the address_space
-                */
-               if (mapping_cap_account_dirty(mapping)) {
-                       page_mkclean(page);
-                       dec_zone_page_state(page, NR_FILE_DIRTY);
-               }
-               return 1;
-       }
-       write_unlock_irqrestore(&mapping->tree_lock, flags);
-       return 0;
-}
-EXPORT_SYMBOL(test_clear_page_dirty);
-
-/*
  * Clear a page's dirty flag, while caring for dirty memory accounting.
  * Returns true if the page was previously dirty.
  *
diff --git a/mm/rmap.c b/mm/rmap.c
index d8a842a..a028803 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -432,7 +432,7 @@ static int page_mkclean_one(struct page 
 {
        struct mm_struct *mm = vma->vm_mm;
        unsigned long address;
-       pte_t *pte, entry;
+       pte_t *ptep;
        spinlock_t *ptl;
        int ret = 0;
 
@@ -440,22 +440,24 @@ static int page_mkclean_one(struct page 
        if (address == -EFAULT)
                goto out;
 
-       pte = page_check_address(page, mm, address, &ptl);
-       if (!pte)
-               goto out;
-
-       if (!pte_dirty(*pte) && !pte_write(*pte))
-               goto unlock;
-
-       entry = ptep_get_and_clear(mm, address, pte);
-       entry = pte_mkclean(entry);
-       entry = pte_wrprotect(entry);
-       ptep_establish(vma, address, pte, entry);
-       lazy_mmu_prot_update(entry);
-       ret = 1;
-
-unlock:
-       pte_unmap_unlock(pte, ptl);
+       ptep = page_check_address(page, mm, address, &ptl);
+       if (ptep) {
+               pte_t old, new;
+
+               old = *ptep;
+               new = pte_wrprotect(pte_mkclean(old));
+               if (!pte_same(old, new)) {
+                       for (;;) {
+                               flush_cache_page(vma, address, 
page_to_pfn(page));
+                               ptep_exchange(vma, address, ptep, old, new);
+                               if (pte_same(old, new))
+                                       break;
+                               ret |= ptep_flush_dirty(vma, address, ptep, 
old);
+                               flush_tlb_page(vma, address);
+                       }
+               }
+               pte_unmap_unlock(pte, ptl);
+       }
 out:
        return ret;
 }
diff --git a/mm/truncate.c b/mm/truncate.c
index 9bfb8e8..4a38dd1 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -51,6 +51,22 @@ static inline void truncate_partial_page
                do_invalidatepage(page, partial);
 }
 
+void cancel_dirty_page(struct page *page, unsigned int account_size)
+{
+       /* If we're cancelling the page, it had better not be mapped any more
*/
+       if (page_mapped(page)) {
+               static unsigned int warncount;
+
+               WARN_ON(++warncount < 5);
+       }
+               
+       if (TestClearPageDirty(page) && account_size &&
+                       mapping_cap_account_dirty(page->mapping)) {
+               dec_zone_page_state(page, NR_FILE_DIRTY);
+               task_io_account_cancelled_write(account_size);
+       }
+}
+
 /*
  * If truncate cannot remove the fs-private metadata from the page, the
page
  * becomes anonymous.  It will be left on the LRU and may even be
mapped into
@@ -67,11 +83,11 @@ truncate_complete_page(struct address_sp
        if (page->mapping != mapping)
                return;
 
+       cancel_dirty_page(page, PAGE_CACHE_SIZE);
+
        if (PagePrivate(page))
                do_invalidatepage(page, 0);
 
-       if (test_clear_page_dirty(page))
-               task_io_account_cancelled_write(PAGE_CACHE_SIZE);
        ClearPageUptodate(page);
        ClearPageMappedToDisk(page);
        remove_from_page_cache(page);
@@ -350,7 +366,6 @@ int invalidate_inode_pages2_range(struct
                for (i = 0; !ret && i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
                        pgoff_t page_index;
-                       int was_dirty;
 
                        lock_page(page);
                        if (page->mapping != mapping) {
@@ -386,12 +401,8 @@ int invalidate_inode_pages2_range(struct
                                          PAGE_CACHE_SIZE, 0);
                                }
                        }
-                       was_dirty = test_clear_page_dirty(page);
-                       if (!invalidate_complete_page2(mapping, page)) {
-                               if (was_dirty)
-                                       set_page_dirty(page);
+                       if (!invalidate_complete_page2(mapping, page))
                                ret = -EIO;
-                       }
                        unlock_page(page);
                }
                pagevec_release(&pvec);



On Fri, 2006-12-22 at 02:17 -0800, Andrew Morton wrote:
> On Fri, 22 Dec 2006 11:00:04 +0100
> Martin Michlmayr <[EMAIL PROTECTED]> wrote:
> 
> > > - if (TestClearPageDirty(page) && account_size)
> > > + if (TestClearPageDirty(page) && account_size) {
> > > +         dec_zone_page_state(page, NR_FILE_DIRTY);
> > >           task_io_account_cancelled_write(account_size);
> > > + }
> > 
> > This hunk (on top of git from about 2 days ago and your latest patch)
> > results in the installer hanging right at the start. 
> 
> You'll need this also:
> 
> From: Andrew Morton <[EMAIL PROTECTED]>
> 
> Only (un)account for IO and page-dirtying for devices which have real backing
> store (ie: not tmpfs or ramdisks).
> 
> Cc: "David S. Miller" <[EMAIL PROTECTED]>
> Cc: Linus Torvalds <[EMAIL PROTECTED]>
> Signed-off-by: Andrew Morton <[EMAIL PROTECTED]>
> ---
> 
>  mm/truncate.c |    3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff -puN mm/truncate.c~truncate-dirty-memory-accounting-fix mm/truncate.c
> --- a/mm/truncate.c~truncate-dirty-memory-accounting-fix
> +++ a/mm/truncate.c
> @@ -60,7 +60,8 @@ void cancel_dirty_page(struct page *page
>               WARN_ON(++warncount < 5);
>       }
>               
> -     if (TestClearPageDirty(page) && account_size) {
> +     if (TestClearPageDirty(page) && account_size &&
> +                     mapping_cap_account_dirty(page->mapping)) {
>               dec_zone_page_state(page, NR_FILE_DIRTY);
>               task_io_account_cancelled_write(account_size);
>       }
> _
> 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to