Here's a C++ entry from Dave Smith who doesn't want to figure out how to use mutt to send this.
Here are the CPU specs and the timings: --$ cat /proc/cpuinfo processor : 0 vendor_id : GenuineIntel cpu family : 15 model : 3 model name : Intel(R) Pentium(R) 4 CPU 2.80GHz stepping : 3 cpu MHz : 2793.085 cache size : 1024 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 5 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm bogomips : 5570.56 --$ time ./word-count /tmp/kjv100 >/dev/null real 0m3.533s user 0m3.530s sys 0m0.010s -- Byron Clark
/**
* Word counter.
*
* This programs count the number of times each dictionary word occurs in a file.
* It was written with speed in mind, AND NOTHING ELSE, including error checking.
* Do NOT use this code for anything but winning competitions.
*
* Author: Dave Smith
*/
#include <fstream>
#include <iostream>
#include <set>
#define DICTIONARY "/usr/share/dict/words"
#define USE_HASH_MAP 1
using namespace std;
#if USE_HASH_MAP
#include <hash_map.h>
struct eqstr
{
bool operator()(const char* s1, const char* s2) const
{
return strcmp(s1, s2) == 0;
}
};
typedef hash_map<const char*, int, hash<const char*>, eqstr> WordHistogram;
#else
#include <map>
struct ltstr
{
bool operator()(const char* s1, const char* s2) const
{
return strcmp(s1, s2) < 0;
}
};
typedef map<char*, int, ltstr> WordHistogram;
#endif
WordHistogram wordCountMap;
char word[50];
char line[1024];
char *heapWord;
int len;
WordHistogram::iterator it;
int main(int argc, char** argv)
{
FILE *dictFile = fopen( DICTIONARY, "r" );
FILE *wordFile = fopen( argv[1], "r" );
// Store each dictionary word in the histogram with a value of 0.
while( fgets( word, 49, dictFile ) )
{
len = strlen( word );
word[--len] = 0; // Chop off the new-line.
if( len > 0 )
{
heapWord = new char[ 50 ];
strcpy( heapWord, word );
wordCountMap[heapWord] = 0;
}
}
// Read in each word from each line in the input file:
while( fgets( line, 1024, wordFile ) )
{
len = strlen( line );
line[--len] = 0;
char *lineWord = strtok( line, ",[EMAIL PROTECTED]&*()_ " ); // Unsafe, stupid, but fast.
do
{
if( lineWord != NULL )
{
// Is this a dictionary word?
it = wordCountMap.find( lineWord );
if( it != wordCountMap.end() )
{
(*it).second++;
}
}
}
while( (lineWord = strtok( NULL, " " )) != NULL );
}
// Print the results:
for( WordHistogram::iterator it = wordCountMap.begin(); it != wordCountMap.end(); it++ )
{
if( (*it).second > 0 )
printf( "%s: %d\n", (*it).first, (*it).second );
}
return 0;
}
signature.asc
Description: Digital signature
/* PLUG: http://plug.org, #utah on irc.freenode.net Unsubscribe: http://plug.org/mailman/options/plug Don't fear the penguin. */
