Bring it on!
:-)

On May 19, 2010, at 11:07 AM, Brice Goglin wrote:

> This branch is pretty much ready to merge in trunk for 1.1.
> If somebody doesn't like it, please complain!
> I'll merge by the end of the week otherwise.
> 
> Brice
> 
> 
> 
> 
> On 19/05/2010 17:01, bgog...@osl.iu.edu wrote:
> > Author: bgoglin
> > Date: 2010-05-19 11:01:42 EDT (Wed, 19 May 2010)
> > New Revision: 2107
> > URL: https://svn.open-mpi.org/trac/hwloc/changeset/2107
> >
> > Log:
> > Deoptimize many cpuset routines by using HWLOC_CPUSUBSET_READULONG()
> > all the time instead of manually handling the cases where a ulong index
> > is valid in one of the input cpuset and/or the other.
> >
> > The performance gain wasn't so huge (0-20%), and the code was waaaaay
> > harder to read/maintain. And things should be much easier to deal
> > with when we'll add sparse cpuset support.
> > Text files modified:
> >    branches/dyncpusets/src/cpuset.c |   127 
> > ++++++---------------------------------
> >    1 files changed, 22 insertions(+), 105 deletions(-)
> >
> > Modified: branches/dyncpusets/src/cpuset.c
> > ==============================================================================
> > --- branches/dyncpusets/src/cpuset.c  (original)
> > +++ branches/dyncpusets/src/cpuset.c  2010-05-19 11:01:42 EDT (Wed, 19 May 
> > 2010)
> > @@ -553,24 +553,13 @@
> > 
> >  int hwloc_cpuset_isequal (const struct hwloc_cpuset_s *set1, const struct 
> > hwloc_cpuset_s *set2)
> >  {
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(set1);
> >       HWLOC__CPUSET_CHECK(set2);
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             if (set1->ulongs[i] != set2->ulongs[i])
> > -                     return 0;
> > -
> > -     val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<set2->ulongs_count; i++)
> > -             if (set2->ulongs[i] != val)
> > -                     return 0;
> > -
> > -     val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<set1->ulongs_count; i++)
> > -             if (set1->ulongs[i] != val)
> > +     for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++)
> > +             if (HWLOC_CPUSUBSET_READULONG(set1, i) != 
> > HWLOC_CPUSUBSET_READULONG(set2, i))
> >                       return 0;
> > 
> >       if (set1->infinite != set2->infinite)
> > @@ -581,26 +570,15 @@
> > 
> >  int hwloc_cpuset_intersects (const struct hwloc_cpuset_s *set1, const 
> > struct hwloc_cpuset_s *set2)
> >  {
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(set1);
> >       HWLOC__CPUSET_CHECK(set2);
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             if ((set1->ulongs[i] & set2->ulongs[i]) != 
> > HWLOC_CPUSUBSET_ZERO)
> > +     for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++)
> > +             if ((HWLOC_CPUSUBSET_READULONG(set1, i) & 
> > HWLOC_CPUSUBSET_READULONG(set2, i)) != HWLOC_CPUSUBSET_ZERO)
> >                       return 1;
> > 
> > -     val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<set2->ulongs_count; i++)
> > -             if ((set2->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO)
> > -                     return 0;
> > -
> > -     val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<set1->ulongs_count; i++)
> > -             if ((set1->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO)
> > -                     return 0;
> > -
> >       if (set1->infinite && set2->infinite)
> >               return 0;
> > 
> > @@ -614,12 +592,8 @@
> >       HWLOC__CPUSET_CHECK(sub_set);
> >       HWLOC__CPUSET_CHECK(super_set);
> > 
> > -     for(i=0; i<sub_set->ulongs_count && i<super_set->ulongs_count; i++)
> > -       if (super_set->ulongs[i] != (super_set->ulongs[i] | 
> > sub_set->ulongs[i]))
> > -                     return 0;
> > -
> > -     for(; i<sub_set->ulongs_count; i++)
> > -             if (sub_set->ulongs[i] != HWLOC_CPUSUBSET_ZERO && 
> > !super_set->infinite)
> > +     for(i=0; i<sub_set->ulongs_count; i++)
> > +             if (HWLOC_CPUSUBSET_READULONG(super_set, i) != 
> > (HWLOC_CPUSUBSET_READULONG(super_set, i) | 
> > HWLOC_CPUSUBSET_READULONG(sub_set, i)))
> >                       return 0;
> > 
> >       if (sub_set->infinite && !super_set->infinite)
> > @@ -631,8 +605,6 @@
> >  void hwloc_cpuset_or (struct hwloc_cpuset_s *res, const struct 
> > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> >  {
> >       const struct hwloc_cpuset_s *largest = set1->ulongs_count > 
> > set2->ulongs_count ? set1 : set2;
> > -     const struct hwloc_cpuset_s *smallest = set1->ulongs_count > 
> > set2->ulongs_count ? set2 : set1;
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(res);
> > @@ -641,16 +613,8 @@
> > 
> >       hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot 
> > reset since the output may also be an input */
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             res->ulongs[i] = set1->ulongs[i] | set2->ulongs[i];
> > -
> > -     val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<largest->ulongs_count; i++)
> > -             res->ulongs[i] = val | largest->ulongs[i];
> > -
> > -     val |= largest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<res->ulongs_count; i++)
> > -             res->ulongs[i] = val;
> > +     for(i=0; i<res->ulongs_count; i++)
> > +             res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) | 
> > HWLOC_CPUSUBSET_READULONG(set2, i);
> > 
> >       res->infinite = set1->infinite || set2->infinite;
> >  }
> > @@ -658,8 +622,6 @@
> >  void hwloc_cpuset_and (struct hwloc_cpuset_s *res, const struct 
> > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> >  {
> >       const struct hwloc_cpuset_s *largest = set1->ulongs_count > 
> > set2->ulongs_count ? set1 : set2;
> > -     const struct hwloc_cpuset_s *smallest = set1->ulongs_count > 
> > set2->ulongs_count ? set2 : set1;
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(res);
> > @@ -668,16 +630,8 @@
> > 
> >       hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot 
> > reset since the output may also be an input */
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             res->ulongs[i] = set1->ulongs[i] & set2->ulongs[i];
> > -
> > -     val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<largest->ulongs_count; i++)
> > -             res->ulongs[i] = val & largest->ulongs[i];
> > -
> > -     val &= largest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<res->ulongs_count; i++)
> > -             res->ulongs[i] = val;
> > +     for(i=0; i<res->ulongs_count; i++)
> > +             res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & 
> > HWLOC_CPUSUBSET_READULONG(set2, i);
> > 
> >       res->infinite = set1->infinite && set2->infinite;
> >  }
> > @@ -685,8 +639,6 @@
> >  void hwloc_cpuset_andnot (struct hwloc_cpuset_s *res, const struct 
> > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> >  {
> >       const struct hwloc_cpuset_s *largest = set1->ulongs_count > 
> > set2->ulongs_count ? set1 : set2;
> > -     const struct hwloc_cpuset_s *smallest = set1->ulongs_count > 
> > set2->ulongs_count ? set2 : set1;
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(res);
> > @@ -695,16 +647,8 @@
> > 
> >       hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot 
> > reset since the output may also be an input */
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             res->ulongs[i] = set1->ulongs[i] & ~set2->ulongs[i];
> > -
> > -     val = (!smallest->infinite) != (smallest != set2) ? 
> > HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<largest->ulongs_count; i++)
> > -             res->ulongs[i] = val & largest->ulongs[i];
> > -
> > -     val &= (!largest->infinite) != (largest != set2) ? 
> > HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<res->ulongs_count; i++)
> > -             res->ulongs[i] = val;
> > +     for(i=0; i<res->ulongs_count; i++)
> > +             res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & 
> > ~HWLOC_CPUSUBSET_READULONG(set2, i);
> > 
> >       res->infinite = set1->infinite && !set2->infinite;
> >  }
> > @@ -712,8 +656,6 @@
> >  void hwloc_cpuset_xor (struct hwloc_cpuset_s *res, const struct 
> > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2)
> >  {
> >       const struct hwloc_cpuset_s *largest = set1->ulongs_count > 
> > set2->ulongs_count ? set1 : set2;
> > -     const struct hwloc_cpuset_s *smallest = set1->ulongs_count > 
> > set2->ulongs_count ? set2 : set1;
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(res);
> > @@ -722,23 +664,14 @@
> > 
> >       hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot 
> > reset since the output may also be an input */
> > 
> > -     for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++)
> > -             res->ulongs[i] = set1->ulongs[i] ^ set2->ulongs[i];
> > -
> > -     val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<largest->ulongs_count; i++)
> > -             res->ulongs[i] = val ^ largest->ulongs[i];
> > -
> > -     val ^= largest->infinite ? HWLOC_CPUSUBSET_FULL : 
> > HWLOC_CPUSUBSET_ZERO;
> > -     for(; i<res->ulongs_count; i++)
> > -             res->ulongs[i] = val;
> > +     for(i=0; i<res->ulongs_count; i++)
> > +             res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) ^ 
> > HWLOC_CPUSUBSET_READULONG(set2, i);
> > 
> >       res->infinite = (!set1->infinite) != (!set2->infinite);
> >  }
> > 
> >  void hwloc_cpuset_not (struct hwloc_cpuset_s *res, const struct 
> > hwloc_cpuset_s *set)
> >  {
> > -     unsigned long val;
> >       unsigned i;
> > 
> >       HWLOC__CPUSET_CHECK(res);
> > @@ -746,12 +679,8 @@
> > 
> >       hwloc_cpuset_realloc_by_ulongs(res, set->ulongs_count); /* cannot 
> > reset since the output may also be an input */
> > 
> > -     for(i=0; i<set->ulongs_count; i++)
> > -             res->ulongs[i] = ~set->ulongs[i];
> > -
> > -     val = set->infinite ? HWLOC_CPUSUBSET_ZERO : HWLOC_CPUSUBSET_FULL;
> > -     for(; i<res->ulongs_count; i++)
> > -             res->ulongs[i] = val;
> > +     for(i=0; i<res->ulongs_count; i++)
> > +             res->ulongs[i] = ~HWLOC_CPUSUBSET_READULONG(set, i);
> > 
> >       res->infinite = !set->infinite;
> >  }
> > @@ -884,7 +813,7 @@
> > 
> >  int hwloc_cpuset_compare(const struct hwloc_cpuset_s * set1, const struct 
> > hwloc_cpuset_s * set2)
> >  {
> > -     unsigned long val;
> > +     const struct hwloc_cpuset_s *largest = set1->ulongs_count > 
> > set2->ulongs_count ? set1 : set2;
> >       int i;
> > 
> >       HWLOC__CPUSET_CHECK(set1);
> > @@ -893,24 +822,12 @@
> >       if ((!set1->infinite) != (!set2->infinite))
> >               return !!set1->infinite - !!set2->infinite;
> > 
> > -     val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(i=set1->ulongs_count-1; (unsigned) i>=set2->ulongs_count; i--) {
> > -             if (set1->ulongs[i] == val)
> > -                     continue;
> > -             return set1->ulongs[i] < val ? -1 : 1;
> > -     }
> > -
> > -     val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO;
> > -     for(i=set2->ulongs_count-1; (unsigned) i>=set1->ulongs_count; i--) {
> > -             if (val == set2->ulongs[i])
> > -                     continue;
> > -             return val < set2->ulongs[i] ? -1 : 1;
> > -     }
> > -
> > -     for(i=(set2->ulongs_count > set1->ulongs_count ? set1->ulongs_count : 
> > set2->ulongs_count)-1; i>=0; i--) {
> > -             if (set1->ulongs[i] == set2->ulongs[i])
> > +     for(i=largest->ulongs_count-1; i>=0; i--) {
> > +             unsigned long val1 = HWLOC_CPUSUBSET_READULONG(set1, i);
> > +             unsigned long val2 = HWLOC_CPUSUBSET_READULONG(set2, i);
> > +             if (val1 == val2)
> >                       continue;
> > -             return set1->ulongs[i] < set2->ulongs[i] ? -1 : 1;
> > +             return val1 < val2 ? -1 : 1;
> >       }
> > 
> >       return 0;
> > _______________________________________________
> > hwloc-svn mailing list
> > hwloc-...@open-mpi.org
> > http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-svn
> >  
> 
> _______________________________________________
> hwloc-devel mailing list
> hwloc-de...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-devel
> 


-- 
Jeff Squyres
jsquy...@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/


Reply via email to