On 01/07/2017 12:58 PM, Phil Bouchard wrote:
On 01/07/2017 12:11 PM, Phil Bouchard wrote:
On 01/06/2017 10:37 PM, Phil Bouchard wrote:
On 01/06/2017 08:05 PM, Phil Bouchard wrote:
On 01/06/2017 07:17 PM, Phil Bouchard wrote:

Just to conclude I did try the attached benchmark and I get the
following on a x86_64 @ 2.40 GHz:

0: 61331143.40263957 allocations / second
1: 63644162.93924019 allocations / second
2: 177628727.5388474 allocations / second
3: 179850939.5413082 allocations / second
1 / 0: 103.7713621632905% boost
2 / 1: 101.2510431354494% boost

So the fast_pool_allocator is already pretty fast and I can only get a
1% speed boost by allocating big memory blocks.  So it doesn't look
like
there is any way to make the fast_pool_allocator any faster.

Correction: I get a 11% speed boost with the -O3 flag:

0: 60387275.67636769 allocations / second
1: 63133704.55951615 allocations / second
2: 169169529.8609596 allocations / second
3: 188535531.4062488 allocations / second
1 / 0: 104.5480258090584% boost
2 / 1: 111.447688931456% boost

So I am not sure if this is worth the trouble.

Did you know the optimized boost::fast_pool_allocator is 305% times
faster than the regular one?

0: 61407861.18875794 allocations / second
1: 62565419.96725326 allocations / second
2: 175311265.1512761 allocations / second
3: 187319938.708916 allocations / second
1 / 0: 101.8850335381934% boost
2 / 1: 106.8499155187076% boost
3 / 0: 305.0422781101666% boost

Is Qt using a similar pool allocator?

This means the optimized boost::fast_allocator is 470% times faster than
the system "operator new":

0: 37334468.30116969 allocations / second (operator new)
1: 58554420.18534816 allocations / second (boost::pool_allocator)
2: 60072218.82146717 allocations / second (boost::pool_allocator)
3: 164895686.9884111 allocations / second (boost::fast_pool_allocator)
4: 175697519.1510296 allocations / second (boost::fast_pool_allocator)
2 / 1: 102.5921162421463% boost
4 / 3: 106.550706304027% boost
4 / 0: 470.6040480708413% boost

I just tried the "jemalloc" library and I get a 2576228000% speed boost
compared to operator new:

0: 38816440.15979952 allocations / second
1: 62188319.91413037 allocations / second
2: 63819182.38692677 allocations / second
3: 174727730.5139267 allocations / second
4: 186025745.9632413 allocations / second
5: 1000000000000000 allocations / second
5 / 0: 2576228000% boost
5 / 1: 1608019000% boost

Sorry, false alarm. I think I was misusing jemalloc. Now boost::fast_pool_allocator is the winner with a 332% speed boost compared to the normal boost::pool_allocator:

0: inf allocations / second (no allocation)
1: 54435751.65974607 allocations / second (jemalloc)
2: 77056206.33802707 allocations / second (boost::pool_allocator)
3: 80152739.05955189 allocations / second (boost::pool_allocator)
4: 236036104.0824805 allocations / second (boost::fast_pool_allocator)
5: 256307733.3169296 allocations / second (boost::fast_pool_allocator)
5 / 2: 332.6243861429787% boost
// Configured with: ./configure --with-jemalloc-prefix=je_
// Compiled with: g++-4.9 pool-benchmark.cpp -o pool-benchmark -lboost_timer -lboost_system -ljemalloc -O3 -std=c++14 -DBOOST_DISABLE_THREADS

#include <limits>
#include <iomanip>
#include <iostream>
#include <boost/timer.hpp>
#include <boost/pool/pool_alloc.hpp>
#include "jemalloc/jemalloc.h"

using namespace std;
using namespace boost;

template <typename T>
    class je_allocator
    {
    public:
        typedef T        value_type;
        typedef T*       pointer;
        typedef const T* const_pointer;
        typedef T&       reference;
        typedef const T& const_reference;
        typedef std::size_t    size_type;
        typedef std::ptrdiff_t difference_type;

        template <class U>
            struct rebind 
            {
                typedef je_allocator<U> other;
            };

        pointer address (reference value) const 
        {
            return &value;
        }
        const_pointer address (const_reference value) const 
        {
            return &value;
        }

        je_allocator() throw() 
        {
        }
        
        je_allocator(const je_allocator&) throw() 
        {
        }
        
        template <class U>
            je_allocator (const je_allocator<U>&) throw() 
            {
            }
            
        ~je_allocator() throw() 
        {
        }

        size_type max_size () const throw() 
        {
            return std::numeric_limits<std::size_t>::max() / sizeof(T);
        }

        pointer allocate (size_type num, const void* = 0) 
        {
            return (pointer) je_malloc(num);
        }

        void construct (pointer p, const T& value) 
        {
            new ((void*) p) T(value);
        }

        void destroy (pointer p) 
        {
            p->~T();
        }

        void deallocate (pointer p, size_type num) 
        {
            je_free(p);
        }
    };

template <class T1, class T2>
    bool operator== (const je_allocator<T1>&, const je_allocator<T2>&) throw() 
    {
        return true;
    }

template <class T1, class T2>
    bool operator!= (const je_allocator<T1>&, const je_allocator<T2>&) throw() 
    {
        return false;
    }

   

int main()
{
    double speed[6];
    
    static long const n = 100000000;
    
    {
        timer t;
        
        for (int i = 0; i < n; ++ i)
            ;
        
        speed[0] = n / t.elapsed();
    }

    {
        je_allocator<int> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[1] = n / t.elapsed();
    }

    {
        pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[2] = n / t.elapsed();
    }

    {
        pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[3] = n / t.elapsed();
    }

    {
        fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[4] = n / t.elapsed();
    }

    {
        fast_pool_allocator<int, default_user_allocator_new_delete, details::pool::default_mutex, n> p;
        timer t;
        
        for (int i = 0; i < n; ++ i)
            p.allocate(1);
        
        speed[5] = n / t.elapsed();
    }
    
    cout << setprecision(numeric_limits<double>::digits10 + 1);
    
    for (int i = 0; i < 6; ++ i)
        cout << i << ": " << speed[i] << " allocations / second" << endl;

    cout << 5 << " / " << 2 << ": " << speed[5] / speed[2] * 100 << "% boost" << endl;

    return 0;
}
_______________________________________________
Development mailing list
[email protected]
http://lists.qt-project.org/mailman/listinfo/development

Reply via email to