AMD Athlon(tm) 64 Processor 2800+ 2GB Ram

psyco version -
real    0m0.711s
user    0m0.684s
sys     0m0.024s

real    0m0.740s
user    0m0.696s
sys     0m0.008s

real    0m0.679s
user    0m0.644s
sys     0m0.036s

real    0m0.693s
user    0m0.668s
sys     0m0.028s

gcc version 3.3.6 with stlport and -O4 http://sourceforge.net/projects/stlport
real    0m0.467s
user    0m0.444s
sys     0m0.024s

real    0m0.464s
user    0m0.448s
sys     0m0.008s

real    0m0.527s
user    0m0.472s
sys     0m0.016s

real    0m0.529s
user    0m0.484s
sys     0m0.004s

#include <stlport/fstream>
#include <stlport/iostream>
#include <stlport/hash_map>
#include <stlport/string>

using namespace stlport;

string dictionary("/usr/share/dict/words");

int main(int argc, char** argv) {
      hash_map<string, int> words;
      hash_map<string, int> count;
      ifstream w(argv[1]);
      ifstream d(dictionary.c_str());
      string word;

      while (w >> word) {
              words[word]++;
      }

      hash_map<string, int>::iterator i;
      while (d >> word) {
              i = words.find(word);
              if (i != words.end()) {
                      count[word] = (*i).second;
              }
      }

      for (i = count.begin(); i != count.end(); i++)
      {
              cout << (*i).first << ' ' << (*i).second << endl;
      }
      return 0;
};


Shane Hathaway wrote:

Shane Hathaway wrote:

Thanks, but the credit goes to the people who have optimized Python dictionaries so well. Sometimes I wish such optimized code were available in plain C/C++.

There's still more fine tuning possible. I've attached a version that shaves another second from the kjv100 test.


Then, install psyco (http://psyco.sourceforge.net/) and run the attached script. I measure 4.9 seconds, which could put the Python + Psyco version in 1st place, at least for a while. :-)

Shane

------------------------------------------------------------------------

#!/usr/bin/python2.4

import sys

def main():
   words_fname = 'words.i'
   if len(sys.argv) > 1:
       source = open(sys.argv[1])
   else:
       source = sys.stdin

   words = {}
   for line in open(words_fname):
       words[line.rstrip()] = 0

   freq = {}
   for line in source:
       for word in line.split():
           if word in freq:
               freq[word] += 1
           elif word in words:
               freq[word] = 1

   for word, count in freq.iteritems():
       print word, count

import psyco
psyco.bind(main)

main()
------------------------------------------------------------------------


/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/


/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/

Reply via email to