Well, it's pretty fast so far. Ran it against a file with over 55 million integers, and it only took 13 plus seconds to the 12 unique integers on a 2.2 GHz with a 4 Megs cache.
br...@condios:~/school/Project/practice$ time ./hashtest3 Size of hash table is 12 records read from file 55136256 First list Key is 1334, value is 1334 First list Key is 1, value is 1 First list Key is 4, value is 4 First list Key is 12222221, value is 12222221 First list Key is 32, value is 32 First list Key is 122334, value is 122334 First list Key is 412, value is 412 First list Key is 12, value is 12 First list Key is 43, value is 43 First list Key is 334, value is 334 First list Key is 56, value is 56 First list Key is 12334, value is 12334 real 0m13.187s user 0m13.120s sys 0m0.060s On Sat, Feb 13, 2010 at 05:36:34PM -0800, Brian Lavender wrote: > I figured it out! When I created the hash with g_hash_table_new_full, > I provided NULL as the hash function. I changed this to g_int_hash, > a built in function in glib that hashes a pointer to an integer. > > I almost wonder if I wrote my own hash if it would have been faster? > > I also found a decent article on IBM regarding glib. > http://www.ibm.com/developerworks/linux/library/l-glib2.html > > brian > > > On Fri, Feb 12, 2010 at 12:24:10AM -0800, Brian Lavender wrote: > > I am trying to eliminate duplicates, so I figured I would just use a > > hash from libglib-2.0. It worked fine with static pointers (or whatever > > you call that portion oof code) > > > > Problem is, once I try it with dynamically allocated pointers, it > > doesn't eliminiate any duplicates. > > > > Anything obvious that I am missing? > > > > Comple with the following. > > > > gcc `pkg-config glib-2.0 --cflags` read_hash.c -o read_hash > > > > -- > Brian Lavender > http://www.brie.com/brian/ > > "About 3 million computers get sold every year in China, but people don't > pay for the software. Someday they will, though. As long as they are going > to steal it, we want them to steal ours. They'll get sort of addicted, and > then we'll somehow figure out how to collect sometime in the next decade." > > -- Bill Gates (Microsoft) 1998 > #include <stdio.h> > #include <string.h> > #include <glib.h> > > #define BUF_SZ 11 > #define NUM_HTABLES 9 > > gboolean compare(gint *a, gint *b) > { > gboolean rvalue; > if (*a == *b) > rvalue = TRUE; > else > rvalue = FALSE; > g_printf("The test ran! a: %d b: %d\n",*a,*b); > return rvalue; > } > > void destroy(gpointer foo) { > g_slice_free1(sizeof(int),foo); > } > > void printeach(gpointer a, gpointer b, gpointer userdata) { > > g_printf("%s Key is %d, value is %d\n",(char *)userdata,*(int *)a, *(int > *)b); > } > > > int savestat( GHashTable *myHTable[NUM_HTABLES], > char *myFile) { > > int i; > FILE *input; > gchar *rd_buf; > gint *c, *d; > int *old_key, *old_value; > int tmpInt; > int nchars; > int cur_sz; > int count=0; > > input = fopen(myFile, "r"); > if (input == NULL) > { > perror("Failed to open file"); > return -1; > } > // buffer for reading input > rd_buf = (gchar *) g_slice_alloc0( BUF_SZ * sizeof(char) ); > > nchars = getline(&rd_buf, &cur_sz, input); > > while ( nchars != -1 ) { > if ( rd_buf[nchars-1] == '\n') > rd_buf[nchars-1] = '\0'; > > tmpInt = atoi(rd_buf); > > c = (int *)g_slice_alloc0( sizeof(int) ); > d = (int *)g_slice_alloc0( sizeof(int) ); > g_memmove(c, &tmpInt,sizeof(int)); > g_memmove(d, &tmpInt,sizeof(int)); > > /* if( g_hash_table_lookup_extended(myHTable[0], c,(gpointer) > &old_key,(gpointer ) &old_value ) ) { */ > /* g_hash_table_insert(myHTable[0], c, d); */ > /* g_free(old_key); */ > /* g_free(old_value); */ > /* } else { */ > count++; > g_hash_table_insert(myHTable[0], c, d); > /* } */ > > nchars = getline(&rd_buf, &cur_sz, input); > } > > close(myFile); > return count; > } > > > > int main() { > > GHashTable *myHTable[NUM_HTABLES]; > > int nrecords; > int i; > char *sf = "myints.txt"; > > guint size; > gboolean rVal; > > > myHTable[0] = g_hash_table_new_full(g_int_hash,(GEqualFunc)compare, > (GDestroyNotify)destroy, > (GDestroyNotify)destroy); > > // We can pass by copy > nrecords = savestat( myHTable, sf); > > size = g_hash_table_size(myHTable[0]); > g_printf("Size of hash table is %d\n",size); > g_printf("records read from file %d\n",nrecords); > > > g_hash_table_foreach(myHTable[0], printeach, "First list"); > > return 0; > } > 334 > 1 > 4 > 56 > 32 > 12 > 1 > 4 > 56 > 56 > 12 > _______________________________________________ > vox-tech mailing list > [email protected] > http://lists.lugod.org/mailman/listinfo/vox-tech -- Brian Lavender http://www.brie.com/brian/ "About 3 million computers get sold every year in China, but people don't pay for the software. Someday they will, though. As long as they are going to steal it, we want them to steal ours. They'll get sort of addicted, and then we'll somehow figure out how to collect sometime in the next decade." -- Bill Gates (Microsoft) 1998 _______________________________________________ vox-tech mailing list [email protected] http://lists.lugod.org/mailman/listinfo/vox-tech
