On Mon, Mar 5, 2012 at 11:26 PM, Pierre Abbat <p...@phma.optus.nu> wrote:
> I'm profiling a program so that I can optimize it and hopefully get it to run 
> in real time. Here's
> the profile of the program running under Linux:
>
> Flat profile:
>
> Each sample counts as 0.01 seconds.
>  %   cumulative   self              self     total
>  time   seconds   seconds    calls   s/call   s/call  name
>  65.94      7.59     7.59   613648     0.00     0.00  
> tonegenerator::fwaves(int)
>  11.29      8.89     1.30   613648     0.00     0.00
> __gnu_cxx::__enable_if<std::__is_scalar<float>::__value, float*>::__type 
> std::__fill_n_a<float*,
> unsigned int, float>(float*, unsigned int, float const&)
>  5.39      9.51     0.62 168385808     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::operator[](unsigned int)
>  2.78      9.83     0.32   991456     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double const,
> errec>, std::_Select1st<std::pair<double const, errec> >, std::less<double>,
> std::allocator<std::pair<double const, errec> >
>>::_M_lower_bound(std::_Rb_tree_node<std::pair<double const, errec> > const*,
> std::_Rb_tree_node<std::pair<double const, errec> > const*, double const&) 
> const
>  2.17     10.08     0.25 15606822     0.00     0.00  
> std::less<double>::operator()(double const&,
> double const&) const
>  1.26     10.22     0.14   613648     0.00     0.00  mastertimer::skip()
>  1.04     10.35     0.12 14558791     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double
> const, errec>, std::_Select1st<std::pair<double const, errec> >, 
> std::less<double>,
> std::allocator<std::pair<double const, errec> >
>>::_S_key(std::_Rb_tree_node<std::pair<double const, errec> > const*)
>  0.87     10.45     0.10                             main
>  0.52     10.51     0.06   613648     0.00     0.00  tonegenerator::unfwave()
>  0.48     10.56     0.06  8000250     0.00     0.00  
> std::_Rb_tree_iterator<std::pair<double
> const, errec> >::operator->() const
>  0.43     10.61     0.05   999513     0.00     0.00  std::map<double, errec, 
> std::less<double>,
> std::allocator<std::pair<double const, errec> > >::end()
>  0.43     10.66     0.05   991456     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double const,
> errec>, std::_Select1st<std::pair<double const, errec> >, std::less<double>,
> std::allocator<std::pair<double const, errec> > >::find(double const&) const
>  0.43     10.71     0.05   205449     0.00     0.00  downspike()
>  0.35     10.75     0.04  1227298     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::~vector()
>  0.35     10.79     0.04   613648     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::vector(std::vector<float, std::allocator<float> > const&)
>  0.30     10.82     0.04  1840944     0.00     0.00  parity(unsigned int)
>  0.26     10.86     0.03 14558791     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double
> const, errec>, std::_Select1st<std::pair<double const, errec> >, 
> std::less<double>,
> std::allocator<std::pair<double const, errec> >
>>::_S_value(std::_Rb_tree_node<std::pair<double const, errec> > const*)
>  0.26     10.88     0.03  2011200     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double const,
> errec>, std::_Select1st<std::pair<double const, errec> >, std::less<double>,
> std::allocator<std::pair<double const, errec> > >::end() const
>  0.26     10.91     0.03  1970656     0.00     0.00  
> std::_Rb_tree_iterator<std::pair<double
> const, errec> >::operator!=(std::_Rb_tree_iterator<std::pair<double const, 
> errec> > const&)
> const
>  0.26     10.95     0.03  1524715     0.00     0.00  float* 
> std::__copy_move<false, true,
> std::random_access_iterator_tag>::__copy_m<float>(float const*, float const*, 
> float*)
>  0.26     10.97     0.03   613648     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::operator=(std::vector<float, std::allocator<float> > const&)
>  0.26     11.01     0.03    22128     0.00     0.00  compdiv(double)
>  0.22     11.03     0.03  7046596     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double const,
> errec>, std::_Select1st<std::pair<double const, errec> >, std::less<double>,
> std::allocator<std::pair<double const, errec> > 
> >::_S_left(std::_Rb_tree_node_base const*)
>  0.22     11.05     0.03  5504021     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::size() const
>  0.17     11.07     0.02  8056834     0.00     0.00  std::pair<double const, 
> errec>*
> std::__addressof<std::pair<double const, errec> >(std::pair<double const, 
> errec>&)
>  0.17     11.10     0.02  1859754     0.00     0.00  
> __gnu_cxx::__normal_iterator<float const*,
> std::vector<float, std::allocator<float> > >::base() const
>  0.17     11.12     0.02  1859754     0.00     0.00
> std::_Niter_base<__gnu_cxx::__normal_iterator<float const*, std::vector<float,
> std::allocator<float> > > >::iterator_type
> std::__niter_base<__gnu_cxx::__normal_iterator<float const*, 
> std::vector<float,
> std::allocator<float> > > >(__gnu_cxx::__normal_iterator<float const*, 
> std::vector<float,
> std::allocator<float> > >)
>  0.17     11.13     0.02  1048032     0.00     0.00  std::vector<double, 
> std::allocator<double>
>>::operator[](unsigned int)
>  0.17     11.15     0.02   613648     0.00     0.00  std::_Vector_base<float, 
> std::allocator<float>
>>::_M_get_Tp_allocator() const
>  0.17     11.18     0.02        1     0.02    10.22  cscale()
>
> Here's the profile under DragonFly:
>
> Flat profile:
>
> Each sample counts as 0.0078125 seconds.
>  %   cumulative   self              self     total
>  time   seconds   seconds    calls   s/call   s/call  name
>  45.55    122.17   122.17  1255310     0.00     0.00  _slaballoc
>  25.03    189.30    67.13   613648     0.00     0.00  
> tonegenerator::fwaves(int)
>  22.74    250.29    60.99      415     0.15     0.15  write
>  4.19    261.53    11.23                             .mcount
>  1.11    264.50     2.98  2752043     0.00     0.00  memmove
>  0.37    265.50     1.00   613648     0.00     0.00
> __gnu_cxx::__enable_if<std::__is_scalar<float>::__value, float*>::__type 
> std::__fill_n_a<float*,
> unsigned int, float>(float*, unsigned int, float const&)
>  0.22    266.09     0.59 168385808     0.00     0.00  std::vector<float, 
> std::allocator<float>
>>::operator[](unsigned int)
>  0.10    266.36     0.27   946349     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double const,
> errec>, std::_Select1st<std::pair<double const, errec> >, std::less<double>,
> std::allocator<std::pair<double const, errec> >
>>::_M_lower_bound(std::_Rb_tree_node<std::pair<double const, errec> > const*,
> std::_Rb_tree_node<std::pair<double const, errec> > const*, double const&) 
> const
>  0.06    266.53     0.17 14880846     0.00     0.00  
> std::less<double>::operator()(double
> const&, double const&) const
>  0.05    266.67     0.14  1175511     0.00     0.00  exp
>  0.05    266.79     0.12   613648     0.00     0.00  mastertimer::skip()
>  0.04    266.89     0.10        1     0.10   256.25  main
>  0.03    266.99     0.09  1227296     0.00     0.00  hypot
>  0.02    267.05     0.06  1840944     0.00     0.00  parity(unsigned int)
>  0.02    267.11     0.06   613648     0.00     0.00  tonegenerator::unfwave()
>  0.02    267.17     0.06   277009     0.00     0.00
> __gnu_cxx::__enable_if<std::__is_integer<int>::__value, double>::__type 
> std::log<int>(int)
>  0.02    267.23     0.05  1524715     0.00     0.00  float* 
> std::__copy_move<false, true,
> std::random_access_iterator_tag>::__copy_m<float>(float const*, float const*, 
> float*)
>  0.02    267.28     0.05 13879461     0.00     0.00  std::_Rb_tree<double, 
> std::pair<double
> const, errec>, std::_Select1st<std::pair<double const, errec> >, 
> std::less<double>,
> std::allocator<std::pair<double const, errec> >
>>::_S_key(std::_Rb_tree_node<std::pair<double const, errec> > const*)
>  0.02    267.32     0.05   554079     0.00     0.00  log
>  0.02    267.37     0.05   205449     0.00     0.00  downspike()
>  0.02    267.42     0.05        1     0.05   241.22  cscale()
>
> I replaced downhann(), which was taking even more time than fwaves(int), with 
> downspike(),
> thus making the program much faster. The next function to optimize is 
> fwaves(int), which I'm
> working on. But in DFly, _slaballoc takes more time than fwaves(int). What is 
> it, and how can I
> optimize it?
>
> Pierre

It is the allocation path in our libc slab allocator, specifically nmalloc.

I'd be fascinated to see the allocation pattern this application
produces that cause that much time to be spent in the allocator.
nmalloc is a segregated-storage allocator, patterns should have a lot
of difficulty causing worst-case behavior.

-- vs;
http://ops101.org/4k/

Reply via email to