Bring it on! :-) On May 19, 2010, at 11:07 AM, Brice Goglin wrote:
> This branch is pretty much ready to merge in trunk for 1.1. > If somebody doesn't like it, please complain! > I'll merge by the end of the week otherwise. > > Brice > > > > > On 19/05/2010 17:01, bgog...@osl.iu.edu wrote: > > Author: bgoglin > > Date: 2010-05-19 11:01:42 EDT (Wed, 19 May 2010) > > New Revision: 2107 > > URL: https://svn.open-mpi.org/trac/hwloc/changeset/2107 > > > > Log: > > Deoptimize many cpuset routines by using HWLOC_CPUSUBSET_READULONG() > > all the time instead of manually handling the cases where a ulong index > > is valid in one of the input cpuset and/or the other. > > > > The performance gain wasn't so huge (0-20%), and the code was waaaaay > > harder to read/maintain. And things should be much easier to deal > > with when we'll add sparse cpuset support. > > Text files modified: > > branches/dyncpusets/src/cpuset.c | 127 > > ++++++--------------------------------- > > 1 files changed, 22 insertions(+), 105 deletions(-) > > > > Modified: branches/dyncpusets/src/cpuset.c > > ============================================================================== > > --- branches/dyncpusets/src/cpuset.c (original) > > +++ branches/dyncpusets/src/cpuset.c 2010-05-19 11:01:42 EDT (Wed, 19 May > > 2010) > > @@ -553,24 +553,13 @@ > > > > int hwloc_cpuset_isequal (const struct hwloc_cpuset_s *set1, const struct > > hwloc_cpuset_s *set2) > > { > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(set1); > > HWLOC__CPUSET_CHECK(set2); > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - if (set1->ulongs[i] != set2->ulongs[i]) > > - return 0; > > - > > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<set2->ulongs_count; i++) > > - if (set2->ulongs[i] != val) > > - return 0; > > - > > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<set1->ulongs_count; i++) > > - if (set1->ulongs[i] != val) > > + for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++) > > + if (HWLOC_CPUSUBSET_READULONG(set1, i) != > > HWLOC_CPUSUBSET_READULONG(set2, i)) > > return 0; > > > > if (set1->infinite != set2->infinite) > > @@ -581,26 +570,15 @@ > > > > int hwloc_cpuset_intersects (const struct hwloc_cpuset_s *set1, const > > struct hwloc_cpuset_s *set2) > > { > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(set1); > > HWLOC__CPUSET_CHECK(set2); > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - if ((set1->ulongs[i] & set2->ulongs[i]) != > > HWLOC_CPUSUBSET_ZERO) > > + for(i=0; i<set1->ulongs_count || i<set2->ulongs_count; i++) > > + if ((HWLOC_CPUSUBSET_READULONG(set1, i) & > > HWLOC_CPUSUBSET_READULONG(set2, i)) != HWLOC_CPUSUBSET_ZERO) > > return 1; > > > > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<set2->ulongs_count; i++) > > - if ((set2->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO) > > - return 0; > > - > > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<set1->ulongs_count; i++) > > - if ((set1->ulongs[i] & val) != HWLOC_CPUSUBSET_ZERO) > > - return 0; > > - > > if (set1->infinite && set2->infinite) > > return 0; > > > > @@ -614,12 +592,8 @@ > > HWLOC__CPUSET_CHECK(sub_set); > > HWLOC__CPUSET_CHECK(super_set); > > > > - for(i=0; i<sub_set->ulongs_count && i<super_set->ulongs_count; i++) > > - if (super_set->ulongs[i] != (super_set->ulongs[i] | > > sub_set->ulongs[i])) > > - return 0; > > - > > - for(; i<sub_set->ulongs_count; i++) > > - if (sub_set->ulongs[i] != HWLOC_CPUSUBSET_ZERO && > > !super_set->infinite) > > + for(i=0; i<sub_set->ulongs_count; i++) > > + if (HWLOC_CPUSUBSET_READULONG(super_set, i) != > > (HWLOC_CPUSUBSET_READULONG(super_set, i) | > > HWLOC_CPUSUBSET_READULONG(sub_set, i))) > > return 0; > > > > if (sub_set->infinite && !super_set->infinite) > > @@ -631,8 +605,6 @@ > > void hwloc_cpuset_or (struct hwloc_cpuset_s *res, const struct > > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2) > > { > > const struct hwloc_cpuset_s *largest = set1->ulongs_count > > > set2->ulongs_count ? set1 : set2; > > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > > > set2->ulongs_count ? set2 : set1; > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(res); > > @@ -641,16 +613,8 @@ > > > > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot > > reset since the output may also be an input */ > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - res->ulongs[i] = set1->ulongs[i] | set2->ulongs[i]; > > - > > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<largest->ulongs_count; i++) > > - res->ulongs[i] = val | largest->ulongs[i]; > > - > > - val |= largest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<res->ulongs_count; i++) > > - res->ulongs[i] = val; > > + for(i=0; i<res->ulongs_count; i++) > > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) | > > HWLOC_CPUSUBSET_READULONG(set2, i); > > > > res->infinite = set1->infinite || set2->infinite; > > } > > @@ -658,8 +622,6 @@ > > void hwloc_cpuset_and (struct hwloc_cpuset_s *res, const struct > > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2) > > { > > const struct hwloc_cpuset_s *largest = set1->ulongs_count > > > set2->ulongs_count ? set1 : set2; > > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > > > set2->ulongs_count ? set2 : set1; > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(res); > > @@ -668,16 +630,8 @@ > > > > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot > > reset since the output may also be an input */ > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - res->ulongs[i] = set1->ulongs[i] & set2->ulongs[i]; > > - > > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<largest->ulongs_count; i++) > > - res->ulongs[i] = val & largest->ulongs[i]; > > - > > - val &= largest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<res->ulongs_count; i++) > > - res->ulongs[i] = val; > > + for(i=0; i<res->ulongs_count; i++) > > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & > > HWLOC_CPUSUBSET_READULONG(set2, i); > > > > res->infinite = set1->infinite && set2->infinite; > > } > > @@ -685,8 +639,6 @@ > > void hwloc_cpuset_andnot (struct hwloc_cpuset_s *res, const struct > > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2) > > { > > const struct hwloc_cpuset_s *largest = set1->ulongs_count > > > set2->ulongs_count ? set1 : set2; > > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > > > set2->ulongs_count ? set2 : set1; > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(res); > > @@ -695,16 +647,8 @@ > > > > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot > > reset since the output may also be an input */ > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - res->ulongs[i] = set1->ulongs[i] & ~set2->ulongs[i]; > > - > > - val = (!smallest->infinite) != (smallest != set2) ? > > HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<largest->ulongs_count; i++) > > - res->ulongs[i] = val & largest->ulongs[i]; > > - > > - val &= (!largest->infinite) != (largest != set2) ? > > HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(; i<res->ulongs_count; i++) > > - res->ulongs[i] = val; > > + for(i=0; i<res->ulongs_count; i++) > > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) & > > ~HWLOC_CPUSUBSET_READULONG(set2, i); > > > > res->infinite = set1->infinite && !set2->infinite; > > } > > @@ -712,8 +656,6 @@ > > void hwloc_cpuset_xor (struct hwloc_cpuset_s *res, const struct > > hwloc_cpuset_s *set1, const struct hwloc_cpuset_s *set2) > > { > > const struct hwloc_cpuset_s *largest = set1->ulongs_count > > > set2->ulongs_count ? set1 : set2; > > - const struct hwloc_cpuset_s *smallest = set1->ulongs_count > > > set2->ulongs_count ? set2 : set1; > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(res); > > @@ -722,23 +664,14 @@ > > > > hwloc_cpuset_realloc_by_ulongs(res, largest->ulongs_count); /* cannot > > reset since the output may also be an input */ > > > > - for(i=0; i<set1->ulongs_count && i<set2->ulongs_count; i++) > > - res->ulongs[i] = set1->ulongs[i] ^ set2->ulongs[i]; > > - > > - val = smallest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<largest->ulongs_count; i++) > > - res->ulongs[i] = val ^ largest->ulongs[i]; > > - > > - val ^= largest->infinite ? HWLOC_CPUSUBSET_FULL : > > HWLOC_CPUSUBSET_ZERO; > > - for(; i<res->ulongs_count; i++) > > - res->ulongs[i] = val; > > + for(i=0; i<res->ulongs_count; i++) > > + res->ulongs[i] = HWLOC_CPUSUBSET_READULONG(set1, i) ^ > > HWLOC_CPUSUBSET_READULONG(set2, i); > > > > res->infinite = (!set1->infinite) != (!set2->infinite); > > } > > > > void hwloc_cpuset_not (struct hwloc_cpuset_s *res, const struct > > hwloc_cpuset_s *set) > > { > > - unsigned long val; > > unsigned i; > > > > HWLOC__CPUSET_CHECK(res); > > @@ -746,12 +679,8 @@ > > > > hwloc_cpuset_realloc_by_ulongs(res, set->ulongs_count); /* cannot > > reset since the output may also be an input */ > > > > - for(i=0; i<set->ulongs_count; i++) > > - res->ulongs[i] = ~set->ulongs[i]; > > - > > - val = set->infinite ? HWLOC_CPUSUBSET_ZERO : HWLOC_CPUSUBSET_FULL; > > - for(; i<res->ulongs_count; i++) > > - res->ulongs[i] = val; > > + for(i=0; i<res->ulongs_count; i++) > > + res->ulongs[i] = ~HWLOC_CPUSUBSET_READULONG(set, i); > > > > res->infinite = !set->infinite; > > } > > @@ -884,7 +813,7 @@ > > > > int hwloc_cpuset_compare(const struct hwloc_cpuset_s * set1, const struct > > hwloc_cpuset_s * set2) > > { > > - unsigned long val; > > + const struct hwloc_cpuset_s *largest = set1->ulongs_count > > > set2->ulongs_count ? set1 : set2; > > int i; > > > > HWLOC__CPUSET_CHECK(set1); > > @@ -893,24 +822,12 @@ > > if ((!set1->infinite) != (!set2->infinite)) > > return !!set1->infinite - !!set2->infinite; > > > > - val = set2->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(i=set1->ulongs_count-1; (unsigned) i>=set2->ulongs_count; i--) { > > - if (set1->ulongs[i] == val) > > - continue; > > - return set1->ulongs[i] < val ? -1 : 1; > > - } > > - > > - val = set1->infinite ? HWLOC_CPUSUBSET_FULL : HWLOC_CPUSUBSET_ZERO; > > - for(i=set2->ulongs_count-1; (unsigned) i>=set1->ulongs_count; i--) { > > - if (val == set2->ulongs[i]) > > - continue; > > - return val < set2->ulongs[i] ? -1 : 1; > > - } > > - > > - for(i=(set2->ulongs_count > set1->ulongs_count ? set1->ulongs_count : > > set2->ulongs_count)-1; i>=0; i--) { > > - if (set1->ulongs[i] == set2->ulongs[i]) > > + for(i=largest->ulongs_count-1; i>=0; i--) { > > + unsigned long val1 = HWLOC_CPUSUBSET_READULONG(set1, i); > > + unsigned long val2 = HWLOC_CPUSUBSET_READULONG(set2, i); > > + if (val1 == val2) > > continue; > > - return set1->ulongs[i] < set2->ulongs[i] ? -1 : 1; > > + return val1 < val2 ? -1 : 1; > > } > > > > return 0; > > _______________________________________________ > > hwloc-svn mailing list > > hwloc-...@open-mpi.org > > http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-svn > > > > _______________________________________________ > hwloc-devel mailing list > hwloc-de...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/hwloc-devel > -- Jeff Squyres jsquy...@cisco.com For corporate legal information go to: http://www.cisco.com/web/about/doing_business/legal/cri/