Here are the results with the 43mb kjv10
Python wiht psyco
real    0m6.148s
user    0m5.732s
sys     0m0.096s

real    0m5.818s
user    0m5.572s
sys     0m0.052s

real    0m6.266s
user    0m5.976s
sys     0m0.060s

real    0m5.831s
user    0m5.544s
sys     0m0.088s

C++ stlport
real    0m3.152s
user    0m2.960s
sys     0m0.036s

real    0m3.081s
user    0m2.924s
sys     0m0.024s

real    0m3.395s
user    0m3.264s
sys     0m0.012s

real    0m3.509s
user    0m3.304s
sys     0m0.028s

Derek wrote:

AMD Athlon(tm) 64 Processor 2800+ 2GB Ram

psyco version -
real    0m0.711s
user    0m0.684s
sys     0m0.024s

real    0m0.740s
user    0m0.696s
sys     0m0.008s

real    0m0.679s
user    0m0.644s
sys     0m0.036s

real    0m0.693s
user    0m0.668s
sys     0m0.028s

gcc version 3.3.6 with stlport and -O4 http://sourceforge.net/projects/stlport
real    0m0.467s
user    0m0.444s
sys     0m0.024s

real    0m0.464s
user    0m0.448s
sys     0m0.008s

real    0m0.527s
user    0m0.472s
sys     0m0.016s

real    0m0.529s
user    0m0.484s
sys     0m0.004s

#include <stlport/fstream>
#include <stlport/iostream>
#include <stlport/hash_map>
#include <stlport/string>

using namespace stlport;

string dictionary("/usr/share/dict/words");

int main(int argc, char** argv) {
      hash_map<string, int> words;
      hash_map<string, int> count;
      ifstream w(argv[1]);
      ifstream d(dictionary.c_str());
      string word;

      while (w >> word) {
              words[word]++;
      }

      hash_map<string, int>::iterator i;
      while (d >> word) {
              i = words.find(word);
              if (i != words.end()) {
                      count[word] = (*i).second;
              }
      }

      for (i = count.begin(); i != count.end(); i++)
      {
              cout << (*i).first << ' ' << (*i).second << endl;
      }
      return 0;
};


Shane Hathaway wrote:

Shane Hathaway wrote:

Thanks, but the credit goes to the people who have optimized Python dictionaries so well. Sometimes I wish such optimized code were available in plain C/C++.

There's still more fine tuning possible. I've attached a version that shaves another second from the kjv100 test.



Then, install psyco (http://psyco.sourceforge.net/) and run the attached script. I measure 4.9 seconds, which could put the Python + Psyco version in 1st place, at least for a while. :-)

Shane

------------------------------------------------------------------------

#!/usr/bin/python2.4

import sys

def main():
   words_fname = 'words.i'
   if len(sys.argv) > 1:
       source = open(sys.argv[1])
   else:
       source = sys.stdin

   words = {}
   for line in open(words_fname):
       words[line.rstrip()] = 0

   freq = {}
   for line in source:
       for word in line.split():
           if word in freq:
               freq[word] += 1
           elif word in words:
               freq[word] = 1

   for word, count in freq.iteritems():
       print word, count

import psyco
psyco.bind(main)

main()
------------------------------------------------------------------------


/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/


/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/


/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/

Reply via email to