Changeset: 802c6a1cd0ba for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=802c6a1cd0ba
Modified Files:
        monetdb5/extras/rdf/hashmap/hashmap.c
        monetdb5/extras/rdf/hashmap/hashmap.h
        monetdb5/extras/rdf/rdf_shredder.mx
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

Add function for checking the number of frequent CSs


diffs (213 lines):

diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c 
b/monetdb5/extras/rdf/hashmap/hashmap.c
--- a/monetdb5/extras/rdf/hashmap/hashmap.c
+++ b/monetdb5/extras/rdf/hashmap/hashmap.c
@@ -10,22 +10,7 @@
 #define INITIAL_SIZE (256)
 #define MAX_CHAIN_LENGTH (8)
 
-/* We need to keep keys and values */
-typedef struct _hashmap_element{
-       int* key;
-       int num; 
-       int freq;       
-       int in_use;
-       any_t data;
-} hashmap_element;
 
-/* A hashmap has some maximum size and current size,
- * as well as the data to hold. */
-typedef struct _hashmap_map{
-       int table_size;
-       int size;
-       hashmap_element *data;
-} hashmap_map;
 
 /*
  * Return an empty hashmap, or NULL on failure.
@@ -256,10 +241,11 @@ int hashmap_iterate(map_t in, PFany f, a
 int hashmap_iterate_threshold(map_t in, int freqthreshold){
 
        int i;
+       int count = 0; 
 
        /* Cast the hashmap */
        hashmap_map* m = (hashmap_map*) in;
-
+       
        /* On empty hashmap, return immediately */
        if (hashmap_length(m) <= 0)
                return MAP_MISSING;     
@@ -269,11 +255,11 @@ int hashmap_iterate_threshold(map_t in, 
                if(m->data[i].in_use != 0) {
                        if (m->data[i].freq > freqthreshold){
                                //any_t data = (any_t) (m->data[i].data);
-                               
+                               count++; 
                        }
                }
 
-    return MAP_OK;
+    return count;
 }
 
 /*
diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h 
b/monetdb5/extras/rdf/hashmap/hashmap.h
--- a/monetdb5/extras/rdf/hashmap/hashmap.h
+++ b/monetdb5/extras/rdf/hashmap/hashmap.h
@@ -36,6 +36,23 @@ typedef int (*PFany)(any_t, any_t);
  */
 typedef any_t map_t;
 
+/* We need to keep keys and values */
+typedef struct _hashmap_element{
+       int* key;
+       int num; 
+       int freq;       
+       int in_use;
+       any_t data;
+} hashmap_element;
+
+/* A hashmap has some maximum size and current size,
+ * as well as the data to hold. */
+typedef struct _hashmap_map{
+       int table_size;
+       int size;
+       hashmap_element *data;
+} hashmap_map;
+
 /*
  * Return an empty hashmap. Returns NULL if empty.
 */
diff --git a/monetdb5/extras/rdf/rdf_shredder.mx 
b/monetdb5/extras/rdf/rdf_shredder.mx
--- a/monetdb5/extras/rdf/rdf_shredder.mx
+++ b/monetdb5/extras/rdf/rdf_shredder.mx
@@ -563,7 +563,7 @@ RDFParser (BAT **graph, str *location, s
        @:set_handlers(world, rparser, pdata)@
 
        //raptor_parser_set_option(rparser, 0); //MDPHAM: CHECK FOR THIS SETTING
-       raptor_parser_set_option(rparser, RAPTOR_OPTION_SCANNING);
+       //raptor_parser_set_option(rparser, RAPTOR_OPTION_SCANNING);
 
        /* Parse URI or local file. */
        ret = URLisaURL(&isURI, location);
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -48,6 +48,11 @@ static void putCStoHash(map_t csmap, int
        int     freq; 
 
        cs = (int*) malloc(sizeof(int) * num);
+       if (cs==NULL){
+               printf("Malloc failed. at %d", num);
+               exit(-1); 
+       }
+
        copyIntSet(cs, buff, num); 
        if (hashmap_get(csmap, cs, num,(void**)(&getCSoid),1, &freq) != MAP_OK){
                putCSoid = malloc(sizeof(oid)); 
@@ -56,22 +61,30 @@ static void putCStoHash(map_t csmap, int
                err = hashmap_put(csmap, cs, num, putCSoid);    
                assert(err == MAP_OK); 
                                
-               printf("Put CS %d into hashmap \n", (int) *putCSoid);
+               //printf("Put CS %d into hashmap \n", (int) *putCSoid);
 
                (*csoid)++; 
        }
        else{
-               printf("The key %d exists in the hashmap with freq %d \n", 
(int) *getCSoid, freq);
+               //printf("The key %d exists in the hashmap with freq %d \n", 
(int) *getCSoid, freq);
                free(cs); 
 
        }
 }
 
-/*
-static void getTopFreqCSs(map_t csmap){
-       
+
+static void getTopFreqCSs(map_t csmap, int threshold){
+       int count;
+       hashmap_map* m; 
+       count = hashmap_iterate_threshold(csmap, threshold); 
+       m = (hashmap_map *) csmap;
+       printf("Threshold: %d \n ", threshold);
+       printf("Number of frequent CSs %d / Number of CSs %d (Table size: %d) 
\n" , count, m->size, m->table_size);
+
+       return;
+
 }
-*/
+
 
 str
 RDFextractCS(int *ret, bat *sbatid, bat *pbatid){
@@ -80,11 +93,12 @@ RDFextractCS(int *ret, bat *sbatid, bat 
        BATiter si, pi;         /*iterator for BAT of s,p columns in spo table 
*/
        oid     *bt, *pbt; 
        oid     curS;           /* current Subject oid */
+       oid     curP;           /* current Property oid */
        oid     CSoid = 0;      /* Characteristic set oid */
        int     numP;           /* Number of properties for current S */
        map_t   csMap; 
        int*    buff;    
-       int     INIT_PROPERTY_NUM = 256; 
+       int     INIT_PROPERTY_NUM = 50000; 
 
        buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM);
        
@@ -100,27 +114,50 @@ RDFextractCS(int *ret, bat *sbatid, bat 
 
        /* Init a hashmap */
        csMap = hashmap_new(); 
-       numP = 0; 
+       numP = 0;
+       curP = 0; 
 
        BATloop(sbat, p, q){
                bt = (oid *) BUNtloc(si, p);            
                if (*bt != curS){
                        if (p != 0){    /* Not the first S */
                                putCStoHash(csMap, buff, numP, &CSoid); 
+
                        }
                        curS = *bt; 
+                       curP = 0;
                        numP = 0;
                }
                                
                pbt = (oid *) BUNtloc(pi, p); 
-               buff[numP] = *pbt; 
-               numP++; 
-               printf("Travel sbat at %d  value: %d , for pbat: %d \n", (int) 
p, (int) *bt, (int) *pbt);
+
+               if (numP > INIT_PROPERTY_NUM){
+                       printf("# of properties %d is greater than 
INIT_PROPERTY_NUM at CS %d property %d \n", numP, (int)CSoid, (int)*pbt);
+                       exit(-1);
+               }
+               
+               if (curP != *pbt){
+                       buff[numP] = *pbt; 
+                       numP++; 
+                       curP = *pbt; 
+               }
+               //printf("Travel sbat at %d  value: %d , for pbat: %d \n", 
(int) p, (int) *bt, (int) *pbt);
        }
        
        /*put the last CS */
        putCStoHash(csMap, buff, numP, &CSoid); 
 
+       /*get the statistic */
+
+       getTopFreqCSs(csMap,20);
+
+       getTopFreqCSs(csMap,10);
+
+       getTopFreqCSs(csMap,5);
+
+       getTopFreqCSs(csMap,2);
+
+
        BBPreclaim(sbat); 
        BBPreclaim(pbat); 
 
_______________________________________________
checkin-list mailing list
[email protected]
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to