Re: [PATCH 6/7] arch/x86: Add array variants for setting memory to wc caching.

2010-03-18 Thread Pauli Nieminen
On Thu, Mar 18, 2010 at 1:52 AM, Dave Airlie airl...@gmail.com wrote:
 On Thu, Mar 18, 2010 at 6:50 AM, Pauli Nieminen suok...@gmail.com wrote:
 Setting single memory pages at a time to wc takes a lot time in cache flush. 
 To
 reduce number of cache flush set_pages_array_wc and set_memory_array_wc can 
 be
 used to set multiple pages to WC with single cache flush.

 I don't think this is correct, I've cc'ed Suresh and Venki who looked
 at this before,
 but I think there is an array in the x86 code that stores the state
 and it only has a
 single bit in it, it needs to be expanded in order to at WC support.

 Dave.


This worked for me but there might be some hw specific cases when it
doesn't work. (all page act like wc cached ones when running memcpy
test on gtt bo)

This is only minor improvemnet for corner cases. Pool refills general
don't happen in normal use except for 3D. For others cases number of
pages was fairly static so pool code can avoid refills. Of course
there might be some corner case that I didn't know to test.
Even for them pool refills happened about once a second in average.
But allocation were happening in small bursts so this should avoid
frame rate hit in case of a lot of allocation in very short time.


 This improves allocation performance for wc cached pages in drm/ttm.

 Signed-off-by: Pauli Nieminen suok...@gmail.com
 ---
  arch/x86/include/asm/cacheflush.h |    2 +
  arch/x86/mm/pageattr.c            |   53 
 +++-
  2 files changed, 47 insertions(+), 8 deletions(-)

 diff --git a/arch/x86/include/asm/cacheflush.h 
 b/arch/x86/include/asm/cacheflush.h
 index 634c40a..d92d63a 100644
 --- a/arch/x86/include/asm/cacheflush.h
 +++ b/arch/x86/include/asm/cacheflush.h
 @@ -139,9 +139,11 @@ int set_memory_np(unsigned long addr, int numpages);
  int set_memory_4k(unsigned long addr, int numpages);

  int set_memory_array_uc(unsigned long *addr, int addrinarray);
 +int set_memory_array_wc(unsigned long *addr, int addrinarray);
  int set_memory_array_wb(unsigned long *addr, int addrinarray);

  int set_pages_array_uc(struct page **pages, int addrinarray);
 +int set_pages_array_wc(struct page **pages, int addrinarray);
  int set_pages_array_wb(struct page **pages, int addrinarray);

  /*
 diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
 index cf07c26..0c98a75 100644
 --- a/arch/x86/mm/pageattr.c
 +++ b/arch/x86/mm/pageattr.c
 @@ -997,7 +997,8 @@ out_err:
  }
  EXPORT_SYMBOL(set_memory_uc);

 -int set_memory_array_uc(unsigned long *addr, int addrinarray)
 +int _set_memory_array(unsigned long *addr, int addrinarray,
 +               unsigned long new_type)
  {
        int i, j;
        int ret;
 @@ -1007,13 +1008,19 @@ int set_memory_array_uc(unsigned long *addr, int 
 addrinarray)
         */
        for (i = 0; i  addrinarray; i++) {
                ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + 
 PAGE_SIZE,
 -                                       _PAGE_CACHE_UC_MINUS, NULL);
 +                                       new_type, NULL);
                if (ret)
                        goto out_free;
        }

        ret = change_page_attr_set(addr, addrinarray,
                                    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
 +
 +       if (!ret  new_type == _PAGE_CACHE_WC)
 +               ret = change_page_attr_set_clr(addr, addrinarray,
 +                                              __pgprot(_PAGE_CACHE_WC),
 +                                              __pgprot(_PAGE_CACHE_MASK),
 +                                              0, CPA_ARRAY, NULL);
        if (ret)
                goto out_free;

 @@ -1025,8 +1032,19 @@ out_free:

        return ret;
  }
 +
 +int set_memory_array_uc(unsigned long *addr, int addrinarray)
 +{
 +       return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS);
 +}
  EXPORT_SYMBOL(set_memory_array_uc);

 +int set_memory_array_wc(unsigned long *addr, int addrinarray)
 +{
 +       return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC);
 +}
 +EXPORT_SYMBOL(set_memory_array_wc);
 +
  int _set_memory_wc(unsigned long addr, int numpages)
  {
        int ret;
 @@ -1153,26 +1171,34 @@ int set_pages_uc(struct page *page, int numpages)
  }
  EXPORT_SYMBOL(set_pages_uc);

 -int set_pages_array_uc(struct page **pages, int addrinarray)
 +static int _set_pages_array(struct page **pages, int addrinarray,
 +               unsigned long new_type)
  {
        unsigned long start;
        unsigned long end;
        int i;
        int free_idx;
 +       int ret;

        for (i = 0; i  addrinarray; i++) {
                if (PageHighMem(pages[i]))
                        continue;
                start = page_to_pfn(pages[i])  PAGE_SHIFT;
                end = start + PAGE_SIZE;
 -               if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
 +               if (reserve_memtype(start, end, new_type, NULL))
                        goto err_out;
        }

 -       if 

Re: [PATCH 6/7] arch/x86: Add array variants for setting memory to wc caching.

2010-03-18 Thread Pauli Nieminen
On Thu, Mar 18, 2010 at 9:29 PM, Suresh Siddha
suresh.b.sid...@intel.com wrote:
 On Thu, 2010-03-18 at 02:41 -0700, Pauli Nieminen wrote:
 On Thu, Mar 18, 2010 at 1:52 AM, Dave Airlie airl...@gmail.com wrote:
  On Thu, Mar 18, 2010 at 6:50 AM, Pauli Nieminen suok...@gmail.com wrote:
  Setting single memory pages at a time to wc takes a lot time in cache 
  flush. To
  reduce number of cache flush set_pages_array_wc and set_memory_array_wc 
  can be
  used to set multiple pages to WC with single cache flush.
 
  I don't think this is correct, I've cc'ed Suresh and Venki who looked
  at this before,
  but I think there is an array in the x86 code that stores the state
  and it only has a
  single bit in it, it needs to be expanded in order to at WC support.
 

 hmm, I thought we had the array variants for the WC already. I don't
 think there is any explicit reason for not having it. Correcting Venki's
 address, to see if he has anything to add.

  +       ret = cpa_set_pages_array(pages, addrinarray,
  +                       __pgprot(_PAGE_CACHE_UC_MINUS));
  +       if (!ret  new_type == _PAGE_CACHE_WC)
  +               ret = change_page_attr_set_clr(NULL, addrinarray,
  +                                              __pgprot(_PAGE_CACHE_WC),
  +                                              __pgprot(_PAGE_CACHE_MASK),
  +                                              0, CPA_PAGES_ARRAY, pages);

 Pauli, Is there any reason for not using cpa_set_pages_array() here
 aswell?


cpa_set_pages_array is setting mask parameter to zero while
_set_memory_wc is passing the mask parameter. I didn't look deep
enough to the code to know why _set_memory_wc works that way.

 thanks,
 suresh



--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 6/7] arch/x86: Add array variants for setting memory to wc caching.

2010-03-18 Thread Suresh Siddha
On Thu, 2010-03-18 at 02:41 -0700, Pauli Nieminen wrote:
 On Thu, Mar 18, 2010 at 1:52 AM, Dave Airlie airl...@gmail.com wrote:
  On Thu, Mar 18, 2010 at 6:50 AM, Pauli Nieminen suok...@gmail.com wrote:
  Setting single memory pages at a time to wc takes a lot time in cache 
  flush. To
  reduce number of cache flush set_pages_array_wc and set_memory_array_wc 
  can be
  used to set multiple pages to WC with single cache flush.
 
  I don't think this is correct, I've cc'ed Suresh and Venki who looked
  at this before,
  but I think there is an array in the x86 code that stores the state
  and it only has a
  single bit in it, it needs to be expanded in order to at WC support.
 

hmm, I thought we had the array variants for the WC already. I don't
think there is any explicit reason for not having it. Correcting Venki's
address, to see if he has anything to add.

  +   ret = cpa_set_pages_array(pages, addrinarray,
  +   __pgprot(_PAGE_CACHE_UC_MINUS));
  +   if (!ret  new_type == _PAGE_CACHE_WC)
  +   ret = change_page_attr_set_clr(NULL, addrinarray,
  +  __pgprot(_PAGE_CACHE_WC),
  +  __pgprot(_PAGE_CACHE_MASK),
  +  0, CPA_PAGES_ARRAY, pages);

Pauli, Is there any reason for not using cpa_set_pages_array() here
aswell?

thanks,
suresh


--
Download Intel#174; Parallel Studio Eval
Try the new software tools for yourself. Speed compiling, find bugs
proactively, and fine-tune applications for parallel performance.
See why Intel Parallel Studio got high marks during beta.
http://p.sf.net/sfu/intel-sw-dev
--
___
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Re: [PATCH 6/7] arch/x86: Add array variants for setting memory to wc caching.

2010-03-17 Thread Dave Airlie
On Thu, Mar 18, 2010 at 6:50 AM, Pauli Nieminen suok...@gmail.com wrote:
 Setting single memory pages at a time to wc takes a lot time in cache flush. 
 To
 reduce number of cache flush set_pages_array_wc and set_memory_array_wc can be
 used to set multiple pages to WC with single cache flush.

I don't think this is correct, I've cc'ed Suresh and Venki who looked
at this before,
but I think there is an array in the x86 code that stores the state
and it only has a
single bit in it, it needs to be expanded in order to at WC support.

Dave.


 This improves allocation performance for wc cached pages in drm/ttm.

 Signed-off-by: Pauli Nieminen suok...@gmail.com
 ---
  arch/x86/include/asm/cacheflush.h |    2 +
  arch/x86/mm/pageattr.c            |   53 +++-
  2 files changed, 47 insertions(+), 8 deletions(-)

 diff --git a/arch/x86/include/asm/cacheflush.h 
 b/arch/x86/include/asm/cacheflush.h
 index 634c40a..d92d63a 100644
 --- a/arch/x86/include/asm/cacheflush.h
 +++ b/arch/x86/include/asm/cacheflush.h
 @@ -139,9 +139,11 @@ int set_memory_np(unsigned long addr, int numpages);
  int set_memory_4k(unsigned long addr, int numpages);

  int set_memory_array_uc(unsigned long *addr, int addrinarray);
 +int set_memory_array_wc(unsigned long *addr, int addrinarray);
  int set_memory_array_wb(unsigned long *addr, int addrinarray);

  int set_pages_array_uc(struct page **pages, int addrinarray);
 +int set_pages_array_wc(struct page **pages, int addrinarray);
  int set_pages_array_wb(struct page **pages, int addrinarray);

  /*
 diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
 index cf07c26..0c98a75 100644
 --- a/arch/x86/mm/pageattr.c
 +++ b/arch/x86/mm/pageattr.c
 @@ -997,7 +997,8 @@ out_err:
  }
  EXPORT_SYMBOL(set_memory_uc);

 -int set_memory_array_uc(unsigned long *addr, int addrinarray)
 +int _set_memory_array(unsigned long *addr, int addrinarray,
 +               unsigned long new_type)
  {
        int i, j;
        int ret;
 @@ -1007,13 +1008,19 @@ int set_memory_array_uc(unsigned long *addr, int 
 addrinarray)
         */
        for (i = 0; i  addrinarray; i++) {
                ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
 -                                       _PAGE_CACHE_UC_MINUS, NULL);
 +                                       new_type, NULL);
                if (ret)
                        goto out_free;
        }

        ret = change_page_attr_set(addr, addrinarray,
                                    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
 +
 +       if (!ret  new_type == _PAGE_CACHE_WC)
 +               ret = change_page_attr_set_clr(addr, addrinarray,
 +                                              __pgprot(_PAGE_CACHE_WC),
 +                                              __pgprot(_PAGE_CACHE_MASK),
 +                                              0, CPA_ARRAY, NULL);
        if (ret)
                goto out_free;

 @@ -1025,8 +1032,19 @@ out_free:

        return ret;
  }
 +
 +int set_memory_array_uc(unsigned long *addr, int addrinarray)
 +{
 +       return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS);
 +}
  EXPORT_SYMBOL(set_memory_array_uc);

 +int set_memory_array_wc(unsigned long *addr, int addrinarray)
 +{
 +       return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC);
 +}
 +EXPORT_SYMBOL(set_memory_array_wc);
 +
  int _set_memory_wc(unsigned long addr, int numpages)
  {
        int ret;
 @@ -1153,26 +1171,34 @@ int set_pages_uc(struct page *page, int numpages)
  }
  EXPORT_SYMBOL(set_pages_uc);

 -int set_pages_array_uc(struct page **pages, int addrinarray)
 +static int _set_pages_array(struct page **pages, int addrinarray,
 +               unsigned long new_type)
  {
        unsigned long start;
        unsigned long end;
        int i;
        int free_idx;
 +       int ret;

        for (i = 0; i  addrinarray; i++) {
                if (PageHighMem(pages[i]))
                        continue;
                start = page_to_pfn(pages[i])  PAGE_SHIFT;
                end = start + PAGE_SIZE;
 -               if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
 +               if (reserve_memtype(start, end, new_type, NULL))
                        goto err_out;
        }

 -       if (cpa_set_pages_array(pages, addrinarray,
 -                       __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
 -               return 0; /* Success */
 -       }
 +       ret = cpa_set_pages_array(pages, addrinarray,
 +                       __pgprot(_PAGE_CACHE_UC_MINUS));
 +       if (!ret  new_type == _PAGE_CACHE_WC)
 +               ret = change_page_attr_set_clr(NULL, addrinarray,
 +                                              __pgprot(_PAGE_CACHE_WC),
 +                                              __pgprot(_PAGE_CACHE_MASK),
 +                                              0, CPA_PAGES_ARRAY, pages);
 +       if (ret)
 +               goto err_out;
 +       return 0; /* Success */
  err_out:
        free_idx = i;