Le mardi 22 août 2006 12:55, Mc Osten a écrit :
> In fact Python here is faster. Suppose it has a really optimized set
> class...
Maybe I'm missing something but the posted c++codes are not equivalent IMO to
what python is doing. I discarded the "slow" version, and tried to get the
equivalent in c++ of :
"""
#!/usr/bin/env python
size = 1000000
def f():
a = []
for i in range(size):
a.append('What do you know')
a.append('so long...')
a.append('chicken crosses road')
a.append('fool')
b = set(a)
for s in b:
print s
import time
from time import clock
f_start = clock()
f()
f_end = clock()
print "Elapsed: %f seconds" % (f_end - f_start)
"""
I came at first with the following, which is still slower than the python
version :
"""
void print_occurence_of_unique_strings(){
vector<string> a;
const string& s1 = "What do you know?" ;
const string& s2 = "so long..." ;
const string& s3 = "chicken crosses road";
const string& s4 = "fool" ;
for (long int i=0; i<SIZE ; ++i){
a.push_back(s1);
a.push_back(s2);
a.push_back(s3);
a.push_back(s4);
}
set<string> b(a.begin(), a.end());
copy(b.begin(), b.end(),
ostream_iterator<string>(cout, "\n"));
}
"""
Here, all strings, while passed by reference to the vector, are copied one by
one.
Then, I tried this, it just overcome the performance of python code, but not
in the proportion I expected :
"""
void print_occurence_of_unique_strings_compare_by_adresses(){
vector<string*> a;
string s1 = "What do you know?";
string s2 = "so long...";
string s3 = "chicken crosses road";
string s4 = "fool";
for (long int i=0; i<SIZE ; ++i){
a.push_back(&s1);
a.push_back(&s2);
a.push_back(&s3);
a.push_back(&s4);
}
set<string> b;
for (vector<string*>::iterator it=a.begin(); it!=a.end(); it++)
b.insert(**it);
copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n"));
}
"""
The problem here, is that the strings in the set are compared by value, which
is not optimal, and I guess python compare them by adress ("s*n is s*n" has
the same complexity than "s*n == s*n" in CPython, right ?).
so, finally, the code in c++, about ten times faster than the equivalent in
python, must be :
"""
void print_occurence_of_unique_strings_compared_by_address(){
cout << "print_occurence_of_unique_strings_compared_by_address" << endl;
vector<string*> a;
string s1 = "What do you know?";
string s2 = "so long...";
string s3 = "chicken crosses road";
string s4 = "fool";
for (long int i=0; i<SIZE ; ++i){
a.push_back(&s1);
a.push_back(&s2);
a.push_back(&s3);
a.push_back(&s4);
}
set<string*> b(a.begin(), a.end());
set<string> c; // well ordered set (b is ordered by address)
for (set<string*>::iterator it=b.begin(); it!=b.end(); it++)
c.insert(**it);
copy(c.begin(), c.end(), ostream_iterator<string>(cout, "\n"));
}
"""
the result on my box is :
[EMAIL PROTECTED] mar aoû 22 22:24:23:~$ g++ -O3 -o testcpp testcpp.cpp
[EMAIL PROTECTED] mar aoû 22 22:24:29:~$ ./testcpp
print_occurence_of_strings
What do you know?
chicken crosses road
fool
so long...
print_occurence_of_unique_strings
What do you know?
chicken crosses road
fool
so long...
print_occurence_of_unique_strings_compared_by_address
What do you know?
chicken crosses road
fool
so long...
strings : 1.89
unique strings : 0.87
compared by address : 0.18
[EMAIL PROTECTED] mar aoû 22 22:24:38:~$ python2.4 testpython.py
so long...
What do you know
fool
chicken crosses road
Elapsed: 1.680000 seconds
[EMAIL PROTECTED] mar aoû 22 22:24:51:~$ g++ -v
Using built-in specs.
Target: i486-linux-gnu
Configured
with: ../src/configure -v
--enable-languages=c,c++,java,fortran,objc,obj-c++,ada,treelang --prefix=/usr
--enable-shared --with-system-zlib --libexecdir=/usr/lib
--without-included-gettext --enable-threads=posix --enable-nls
--program-suffix=-4.1 --enable-__cxa_atexit --enable-clocale=gnu
--enable-libstdcxx-debug --enable-java-awt=gtk --enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.4.2-gcj-4.1-1.4.2.0/jre --enable-mpfr
--with-tune=i686 --enable-checking=release
i486-linux-gnu
Thread model: posix
gcc version 4.1.2 20060613 (prerelease) (Debian 4.1.1-5)
I've joined the full c++ file as an attachment.
--
_____________
Maric Michaud
_____________
Aristote - www.aristote.info
3 place des tapis
69004 Lyon
Tel: +33 426 880 097
#include <iostream>
#include <ostream>
#include <iterator>
#include <string>
#include <vector>
#include <set>
#include <algorithm>
#include <ctime>
using namespace std;
#define SIZE 1000000
void print_occurence_of_strings(){
cout << "print_occurence_of_strings" << endl;
vector<string> a;
const string& s1 = "What do you know?";
const string& s2 = "so long...";
const string& s3 = "chicken crosses road";
const string& s4 = "fool";
for (long int i=0; i<SIZE ; ++i){
a.push_back(s1);
a.push_back(s2);
a.push_back(s3);
a.push_back(s4);
}
set<string> b(a.begin(), a.end());
copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n"));
}
void print_occurence_of_unique_strings(){
cout << "print_occurence_of_unique_strings" << endl;
vector<string*> a;
string s1 = "What do you know?";
string s2 = "so long...";
string s3 = "chicken crosses road";
string s4 = "fool";
for (long int i=0; i<SIZE ; ++i){
a.push_back(&s1);
a.push_back(&s2);
a.push_back(&s3);
a.push_back(&s4);
}
set<string> b;
for (vector<string*>::iterator it=a.begin(); it!=a.end(); it++)
b.insert(**it);
copy(b.begin(), b.end(), ostream_iterator<string>(cout, "\n"));
}
void print_occurence_of_unique_strings_compared_by_address(){
cout << "print_occurence_of_unique_strings_compared_by_address" << endl;
vector<string*> a;
string s1 = "What do you know?";
string s2 = "so long...";
string s3 = "chicken crosses road";
string s4 = "fool";
for (long int i=0; i<SIZE ; ++i){
a.push_back(&s1);
a.push_back(&s2);
a.push_back(&s3);
a.push_back(&s4);
}
set<string*> b(a.begin(), a.end());
set<string> c; // well ordered set (b is ordered by address)
for (set<string*>::iterator it=b.begin(); it!=b.end(); it++)
c.insert(**it);
copy(c.begin(), c.end(), ostream_iterator<string>(cout, "\n"));
}
int main(){
clock_t f_start1,
f_end1,
f_start2,
f_end2,
f_start3,
f_end3;
f_start1 = clock();
print_occurence_of_strings();
f_end1 = clock();
f_start2 = clock();
print_occurence_of_unique_strings();
f_end2 = clock();
f_start3 = clock();
print_occurence_of_unique_strings_compared_by_address();
f_end3 = clock();
cout << "strings : " << (f_end1 - f_start1) / double(CLOCKS_PER_SEC) << endl;
cout << "unique strings : " << (f_end2 - f_start2) / double(CLOCKS_PER_SEC) << endl;
cout << "compared by address : " << (f_end3 - f_start3) / double(CLOCKS_PER_SEC) << endl;
}
--
http://mail.python.org/mailman/listinfo/python-list