Here's a C++ entry from Dave Smith who doesn't want to figure out how to
use mutt to send this.

Here are the CPU specs and the timings:

--$ cat /proc/cpuinfo
processor       : 0
vendor_id       : GenuineIntel
cpu family      : 15
model           : 3
model name      : Intel(R) Pentium(R) 4 CPU 2.80GHz
stepping        : 3
cpu MHz         : 2793.085
cache size      : 1024 KB
fdiv_bug        : no
hlt_bug         : no
f00f_bug        : no
coma_bug        : no
fpu             : yes
fpu_exception   : yes
cpuid level     : 5
wp              : yes
flags           : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm
bogomips        : 5570.56

--$ time ./word-count /tmp/kjv100 >/dev/null

real    0m3.533s
user    0m3.530s
sys     0m0.010s

-- 
Byron Clark
/**
 * Word counter.
 *
 * This programs count the number of times each dictionary word occurs in a file.
 * It was written with speed in mind, AND NOTHING ELSE, including error checking.
 * Do NOT use this code for anything but winning competitions.
 *
 * Author: Dave Smith
 */

#include <fstream>
#include <iostream>
#include <set>

#define DICTIONARY "/usr/share/dict/words"

#define USE_HASH_MAP 1

using namespace std;

#if USE_HASH_MAP

#include <hash_map.h>
struct eqstr
{
  bool operator()(const char* s1, const char* s2) const
  {
    return strcmp(s1, s2) == 0;
  }
};
typedef hash_map<const char*, int, hash<const char*>, eqstr> WordHistogram;

#else

#include <map>
struct ltstr
{
    bool operator()(const char* s1, const char* s2) const
    {
        return strcmp(s1, s2) < 0;
    }
};
typedef map<char*, int, ltstr> WordHistogram;

#endif

WordHistogram wordCountMap;
char word[50];
char line[1024];
char *heapWord;
int len;
WordHistogram::iterator it;

int main(int argc, char** argv)
{
    FILE *dictFile = fopen( DICTIONARY, "r" );
    FILE *wordFile = fopen( argv[1], "r" );

    // Store each dictionary word in the histogram with a value of 0.
    while( fgets( word, 49, dictFile ) )
    {
        len = strlen( word );
        word[--len] = 0; // Chop off the new-line.
        if( len > 0 )
        {
            heapWord = new char[ 50 ];
            strcpy( heapWord, word );
            wordCountMap[heapWord] = 0;
        }
    }

    // Read in each word from each line in the input file:
    while( fgets( line, 1024, wordFile ) )
    {
        len = strlen( line );
        line[--len] = 0;
        char *lineWord = strtok( line, ",[EMAIL PROTECTED]&*()_ " ); // Unsafe, stupid, but fast.
        do
        {
            if( lineWord != NULL )
            {
                // Is this a dictionary word?
                it = wordCountMap.find( lineWord );
                if( it != wordCountMap.end() )
                {
                    (*it).second++;
                }
            }
        }
        while( (lineWord = strtok( NULL, " " )) != NULL );
    }

    // Print the results:
    for( WordHistogram::iterator it = wordCountMap.begin(); it != wordCountMap.end(); it++ )
    {
        if( (*it).second > 0 )
            printf( "%s: %d\n", (*it).first, (*it).second );
    }

    return 0;
}

Attachment: signature.asc
Description: Digital signature

/*
PLUG: http://plug.org, #utah on irc.freenode.net
Unsubscribe: http://plug.org/mailman/options/plug
Don't fear the penguin.
*/

Reply via email to