performance measure.hpp, NONE, 1.1 sequence_efficiency.cpp, NONE, 1.1 timings.txt, NONE, 1.1 Jamfile.v2, 1.1, 1.2

joel de guzman Sat, 18 Nov 2006 21:15:37 -0800

Update of /cvsroot/boost/boost/libs/fusion/example/performance
In directory sc8-pr-cvs3.sourceforge.net:/tmp/cvs-serv10652


Modified Files:
        Jamfile.v2 
Added Files:
        measure.hpp sequence_efficiency.cpp timings.txt 
Log Message:
sequence performance tests

--- NEW FILE: measure.hpp ---
// Copyright David Abrahams, Matthias Troyer, Michael Gauckler
// 2005. Distributed under the Boost Software License, Version
// 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)

#include <boost/timer.hpp>

namespace test
{
  // This value is required to ensure that a smart compiler's dead
  // code elimination doesn't optimize away anything we're testing.
  // We'll use it to compute the return code of the executable to make
  // sure it's needed.
  int live_code;

  // Call objects of the given Accumulator type repeatedly with x as
  // an argument.
  template <class Accumulator, class Arg>
  void hammer(Arg const& x, long const repeats)
  {
      // Strategy: because the sum in an accumulator after each call
      // depends on the previous value of the sum, the CPU's pipeline
      // might be stalled while waiting for the previous addition to
      // complete.  Therefore, we allocate an array of accumulators,
      // and update them in sequence, so that there's no dependency
      // between adjacent addition operations.
      //
      // Additionally, if there were only one accumulator, the
      // compiler or CPU might decide to update the value in a
      // register rather that writing it back to memory.  we want each
      // operation to at least update the L1 cache.  *** Note: This
      // concern is specific to the particular application at which
      // we're targeting the test. ***

      // This has to be at least as large as the number of
      // simultaneous accumulations that can be executing in the
      // compiler pipeline.  A safe number here is larger than the
      // machine's maximum pipeline depth. If you want to test the L2
      // or L3 cache, or main memory, you can increase the size of
      // this array.  1024 is an upper limit on the pipeline depth of
      // current vector machines.
      const std::size_t number_of_accumulators = 1024;
      live_code = 0; // reset to zero

      Accumulator a[number_of_accumulators];
      
      for (long iteration = 0; iteration < repeats; ++iteration)
      {
          for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
          {
              (*ap)(x);
          }
      }

      // Accumulate all the partial sums to avoid dead code
      // elimination.
      for (Accumulator* ap = a;  ap < a + number_of_accumulators; ++ap)
      {
          live_code += ap->sum;
      }
  }

  // Measure the time required to hammer accumulators of the given
  // type with the argument x.
  template <class Accumulator, class T>
  double measure(T const& x, long const repeats)
  {
      // Hammer accumulators a couple of times to ensure the
      // instruction cache is full of our test code, and that we don't
      // measure the cost of a page fault for accessing the data page
      // containing the memory where the accumulators will be
      // allocated
      hammer<Accumulator>(x, repeats);
      hammer<Accumulator>(x, repeats);

      // Now start a timer
      boost::timer time;
      hammer<Accumulator>(x, repeats);  // This time, we'll measure
      return time.elapsed() / repeats;  // return the time of one iteration
  }
}

--- NEW FILE: sequence_efficiency.cpp ---
/*=============================================================================
    Copyright (c) 2001-2006 Joel de Guzman

    Use, modification and distribution is subject to the Boost Software
    License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
    http://www.boost.org/LICENSE_1_0.txt)
==============================================================================*/
#include "measure.hpp"

#define FUSION_MAX_LIST_SIZE 30
#define FUSION_MAX_VECTOR_SIZE 30

#include <boost/fusion/algorithm/iteration/accumulate.hpp>
#include <boost/fusion/sequence/container/vector.hpp>
#include <boost/fusion/sequence/container/list.hpp>

#include <algorithm>
#include <numeric>
#include <functional>
#include <iostream>
#include <cmath>
#include <limits>

#ifdef _MSC_VER
// inline aggressively
# pragma inline_recursion(on) // turn on inline recursion
# pragma inline_depth(255)    // max inline depth
#endif

namespace
{
    struct poly_add
    {
        template<typename Lhs, typename Rhs>
        struct result
        {
            typedef Lhs type;
        };

        template<typename Lhs, typename Rhs>
        Lhs operator()(const Lhs& lhs, const Rhs& rhs) const
        {
            return lhs + rhs;
        }
    };

    // Our Accumulator function
    template <typename T>
    struct accumulator
    {
        accumulator()
            : sum()
        {}
        
        template <typename Sequence>
        void operator()(Sequence const& seq)
        {
            this->sum += boost::fusion::accumulate(seq, 0, poly_add());
        }
        
        T sum;
    };
}

int main()
{
    using namespace test;
    using namespace boost::fusion;

    vector<
        int, int, int
    > 
    vsmall(BOOST_PP_ENUM_PARAMS(3,));

    list<
        int, int, int
    > 
    lsmall(BOOST_PP_ENUM_PARAMS(3,));

    vector<
        int, int, int, int, int, int, int, int, int, int
    > 
    vmid(BOOST_PP_ENUM_PARAMS(10,));

    list<
        int, int, int, int, int, int, int, int, int, int
    > 
    lmid(BOOST_PP_ENUM_PARAMS(10,));

    vector<
        int, int, int, int, int, int, int, int, int, int
      , int, int, int, int, int, int, int, int, int, int
      , int, int, int, int, int, int, int, int, int, int
    > 
    vbig(BOOST_PP_ENUM_PARAMS(30,));

    list<
        int, int, int, int, int, int, int, int, int, int
      , int, int, int, int, int, int, int, int, int, int
      , int, int, int, int, int, int, int, int, int, int
    > 
    lbig(BOOST_PP_ENUM_PARAMS(30,));

    // first decide how many repetitions to measure
    long repeats = 100;
    double measured = 0;
    while (measured < 1.0 && repeats <= 10000000)
    {
        repeats *= 10;
        
        boost::timer time;

        hammer<accumulator<int> >(vsmall, repeats);
        hammer<accumulator<int> >(lsmall, repeats);
        hammer<accumulator<int> >(vmid, repeats);
        hammer<accumulator<int> >(lmid, repeats);
        hammer<accumulator<int> >(vbig, repeats);
        hammer<accumulator<int> >(lbig, repeats);

        measured = time.elapsed();
    }

    measure<accumulator<int> >(vsmall, 1);
    std::cout 
        << "small vector accumulated result:    " 
        << live_code << std::endl;
    measure<accumulator<int> >(lsmall, 1);
    std::cout 
        << "small list accumulated result:      " 
        << live_code << std::endl;
    measure<accumulator<int> >(vmid, 1);
    std::cout 
        << "medium vector accumulated result:   " 
        << live_code << std::endl;
    measure<accumulator<int> >(lmid, 1);
    std::cout 
        << "medium list accumulated result:     " 
        << live_code << std::endl;
    measure<accumulator<int> >(vbig, 1);
    std::cout 
        << "big vector accumulated result:      " 
        << live_code << std::endl;
    measure<accumulator<int> >(lbig, 1);
    std::cout 
        << "big list accumulated result:        " 
        << live_code << std::endl;

    std::cout.setf(std::ios::scientific);

    std::cout
        << "small vector time:                  "
        << measure<accumulator<int> >(vsmall, repeats)
        << std::endl;
    std::cout
        << "small list time:                    "
        << measure<accumulator<int> >(lsmall, repeats)
        << std::endl;    
    std::cout
        << "medium vector time:                 "
        << measure<accumulator<int> >(vmid, repeats)
        << std::endl;
    std::cout
        << "medium list time:                   "
        << measure<accumulator<int> >(lmid, repeats)
        << std::endl;
    std::cout
        << "big vector time:                    "
        << measure<accumulator<int> >(vbig, repeats)
        << std::endl;
    std::cout
        << "big list time:                      "
        << measure<accumulator<int> >(lbig, repeats)
        << std::endl;

    // This is ultimately responsible for preventing all the test code
    // from being optimized away.  Change this to return 0 and you
    // unplug the whole test's life support system.
    return live_code != 0;
}

--- NEW FILE: timings.txt ---
Timing result for sequence_efficiency.cpp comparing the speed of various
fusion sequences. The test involves accumulating the elements of the
sequence which is primed to have values 0..N (N=size of sequence). Small,
medium and big sequences are tested where:

    small = 3 elements
    medium = 10 elements
    big = 30 elements

Tester: Joel de Guzman. WinXP, P4-3.0GHZ, 2GB RAM

VC7.1 (flags = /MD /O2 /EHsc /GS)

    small vector time:                  1.880000e-006
    small list time:                    2.040000e-006
    medium vector time:                 2.030000e-006
    medium list time:                   3.590000e-006
    big vector time:                    1.880000e-006
    big list time:                      9.070000e-006

VC8.0 (flags = /MD /O2 /EHsc /GS)

    small vector time:                  1.880000e-006
    small list time:                    2.030000e-006
    medium vector time:                 2.030000e-006
    medium list time:                   3.750000e-006
    big vector time:                    1.880000e-006
    big list time:                      9.380000e-006

G++ 3.4 (flags = -ftemplate-depth-128  -funroll-loops -O3 -finline-functions 
-Wno-inline -Wall)

    small vector time:                  2.500000e-05
    small list time:                    2.500000e-05
    medium vector time:                 7.970000e-05
    medium list time:                   7.970000e-05
    big vector time:                    2.516000e-04
    big list time:                      2.485000e-04

Intel 9.1 (flags = /MD /O2 /EHsc /GS)

    small vector time:                  1.141000e-006
    small list time:                    1.156000e-006
    medium vector time:                 1.156000e-006
    medium list time:                   1.156000e-006
    big vector time:                    1.171000e-006
    big list time:                      1.156000e-006




Index: Jamfile.v2
===================================================================
RCS file: /cvsroot/boost/boost/libs/fusion/example/performance/Jamfile.v2,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -d -r1.1 -r1.2
--- Jamfile.v2  10 Sep 2006 16:15:37 -0000      1.1
+++ Jamfile.v2  19 Nov 2006 05:15:31 -0000      1.2
@@ -13,3 +13,6 @@
 exe inner_product : inner_product.cpp ;
 
 exe inner_product2 : inner_product2.cpp ;
+
+exe sequence_efficiency : sequence_efficiency.cpp ;
+


-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys - and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
Boost-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/boost-cvs

[Boost-cvs] CVS: boost/libs/fusion/example/performance measure.hpp, NONE, 1.1 sequence_efficiency.cpp, NONE, 1.1 timings.txt, NONE, 1.1 Jamfile.v2, 1.1, 1.2

Reply via email to